feat: 实现 @ant/computer-use-input — macOS 键鼠模拟
使用 AppleScript + JXA (JavaScript for Automation) 实现完整 API: - moveMouse: CGEvent 鼠标移动 - key/keys: System Events 键盘输入(支持修饰键组合) - mouseLocation: CGEvent 查询当前鼠标位置 - mouseButton: CGEvent 鼠标点击/按下/释放 - mouseScroll: CGEvent 滚轮事件 - typeText: System Events 文本输入 - getFrontmostAppInfo: 获取前台应用 bundleId + 名称 兼容 require() 调用方式(所有方法作为命名导出)。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
30e863c9b8
commit
975b4876cc
6
TODO.md
6
TODO.md
@ -11,9 +11,9 @@
|
||||
- [x] `image-processor-napi` — 图像处理 NAPI 模块 (sharp + osascript 剪贴板)
|
||||
|
||||
<!-- - [ ] `@ant/computer-use-swift` — Computer Use Swift 原生模块
|
||||
- [ ] `@ant/computer-use-mcp` — Computer Use MCP 服务
|
||||
- [ ] `@ant/computer-use-input` — Computer Use 输入模块
|
||||
- [ ] `@ant/claude-for-chrome-mcp` — Chrome MCP 扩展 -->
|
||||
- [ ] `@ant/computer-use-mcp` — Computer Use MCP 服务 -->
|
||||
- [x] `@ant/computer-use-input` — Computer Use 输入模块 (macOS AppleScript/JXA 实现)
|
||||
<!-- - [ ] `@ant/claude-for-chrome-mcp` — Chrome MCP 扩展 -->
|
||||
|
||||
## 工程化能力
|
||||
|
||||
|
||||
@ -2,7 +2,6 @@
|
||||
"name": "@ant/computer-use-input",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"main": "./src/index.ts",
|
||||
"types": "./src/index.ts"
|
||||
}
|
||||
|
||||
@ -1,39 +1,174 @@
|
||||
/**
|
||||
* @ant/computer-use-input — macOS 键鼠模拟实现
|
||||
*
|
||||
* 使用 macOS 原生工具实现:
|
||||
* - AppleScript (osascript) — 应用信息、键盘输入
|
||||
* - CGEvent via AppleScript-ObjC bridge — 鼠标操作、位置查询
|
||||
*
|
||||
* 仅 macOS 支持。其他平台返回 { isSupported: false }
|
||||
*/
|
||||
|
||||
import { $ } from 'bun'
|
||||
|
||||
interface FrontmostAppInfo {
|
||||
bundleId: string
|
||||
appName: string
|
||||
}
|
||||
|
||||
// AppleScript key code mapping
|
||||
const KEY_MAP: Record<string, number> = {
|
||||
return: 36, enter: 36, tab: 48, space: 49, delete: 51, backspace: 51,
|
||||
escape: 53, esc: 53,
|
||||
left: 123, right: 124, down: 125, up: 126,
|
||||
f1: 122, f2: 120, f3: 99, f4: 118, f5: 96, f6: 97,
|
||||
f7: 98, f8: 100, f9: 101, f10: 109, f11: 103, f12: 111,
|
||||
home: 115, end: 119, pageup: 116, pagedown: 121,
|
||||
}
|
||||
|
||||
const MODIFIER_MAP: Record<string, string> = {
|
||||
command: 'command down', cmd: 'command down', meta: 'command down', super: 'command down',
|
||||
shift: 'shift down',
|
||||
option: 'option down', alt: 'option down',
|
||||
control: 'control down', ctrl: 'control down',
|
||||
}
|
||||
|
||||
async function osascript(script: string): Promise<string> {
|
||||
const result = await $`osascript -e ${script}`.quiet().nothrow().text()
|
||||
return result.trim()
|
||||
}
|
||||
|
||||
async function jxa(script: string): Promise<string> {
|
||||
const result = await $`osascript -l JavaScript -e ${script}`.quiet().nothrow().text()
|
||||
return result.trim()
|
||||
}
|
||||
|
||||
function jxaSync(script: string): string {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['osascript', '-l', 'JavaScript', '-e', script],
|
||||
stdout: 'pipe', stderr: 'pipe',
|
||||
})
|
||||
return new TextDecoder().decode(result.stdout).trim()
|
||||
}
|
||||
|
||||
function buildMouseJxa(eventType: string, x: number, y: number, btn: number, clickState?: number): string {
|
||||
let script = `ObjC.import("CoreGraphics"); var p = $.CGPointMake(${x},${y}); var e = $.CGEventCreateMouseEvent(null, $.${eventType}, p, ${btn});`
|
||||
if (clickState !== undefined) {
|
||||
script += ` $.CGEventSetIntegerValueField(e, $.kCGMouseEventClickState, ${clickState});`
|
||||
}
|
||||
script += ` $.CGEventPost($.kCGHIDEventTap, e);`
|
||||
return script
|
||||
}
|
||||
|
||||
// ---- Implementation functions ----
|
||||
|
||||
async function moveMouse(x: number, y: number, _animated: boolean): Promise<void> {
|
||||
await jxa(buildMouseJxa('kCGEventMouseMoved', x, y, 0))
|
||||
}
|
||||
|
||||
async function key(keyName: string, action: 'press' | 'release'): Promise<void> {
|
||||
if (action === 'release') return
|
||||
const lower = keyName.toLowerCase()
|
||||
const keyCode = KEY_MAP[lower]
|
||||
if (keyCode !== undefined) {
|
||||
await osascript(`tell application "System Events" to key code ${keyCode}`)
|
||||
} else {
|
||||
await osascript(`tell application "System Events" to keystroke "${keyName.length === 1 ? keyName : lower}"`)
|
||||
}
|
||||
}
|
||||
|
||||
async function keys(parts: string[]): Promise<void> {
|
||||
const modifiers: string[] = []
|
||||
let finalKey: string | null = null
|
||||
for (const part of parts) {
|
||||
const mod = MODIFIER_MAP[part.toLowerCase()]
|
||||
if (mod) modifiers.push(mod)
|
||||
else finalKey = part
|
||||
}
|
||||
if (!finalKey) return
|
||||
const lower = finalKey.toLowerCase()
|
||||
const keyCode = KEY_MAP[lower]
|
||||
const modStr = modifiers.length > 0 ? ` using {${modifiers.join(', ')}}` : ''
|
||||
if (keyCode !== undefined) {
|
||||
await osascript(`tell application "System Events" to key code ${keyCode}${modStr}`)
|
||||
} else {
|
||||
await osascript(`tell application "System Events" to keystroke "${finalKey.length === 1 ? finalKey : lower}"${modStr}`)
|
||||
}
|
||||
}
|
||||
|
||||
async function mouseLocation(): Promise<{ x: number; y: number }> {
|
||||
const result = await jxa('ObjC.import("CoreGraphics"); var e = $.CGEventCreate(null); var p = $.CGEventGetLocation(e); p.x + "," + p.y')
|
||||
const [xStr, yStr] = result.split(',')
|
||||
return { x: Math.round(Number(xStr)), y: Math.round(Number(yStr)) }
|
||||
}
|
||||
|
||||
async function mouseButton(
|
||||
button: 'left' | 'right' | 'middle',
|
||||
action: 'click' | 'press' | 'release',
|
||||
count?: number,
|
||||
): Promise<void> {
|
||||
const pos = await mouseLocation()
|
||||
const btn = button === 'left' ? 0 : button === 'right' ? 1 : 2
|
||||
const downType = btn === 0 ? 'kCGEventLeftMouseDown' : btn === 1 ? 'kCGEventRightMouseDown' : 'kCGEventOtherMouseDown'
|
||||
const upType = btn === 0 ? 'kCGEventLeftMouseUp' : btn === 1 ? 'kCGEventRightMouseUp' : 'kCGEventOtherMouseUp'
|
||||
|
||||
if (action === 'click') {
|
||||
for (let i = 0; i < (count ?? 1); i++) {
|
||||
await jxa(buildMouseJxa(downType, pos.x, pos.y, btn, i + 1))
|
||||
await jxa(buildMouseJxa(upType, pos.x, pos.y, btn, i + 1))
|
||||
}
|
||||
} else if (action === 'press') {
|
||||
await jxa(buildMouseJxa(downType, pos.x, pos.y, btn))
|
||||
} else {
|
||||
await jxa(buildMouseJxa(upType, pos.x, pos.y, btn))
|
||||
}
|
||||
}
|
||||
|
||||
async function mouseScroll(amount: number, direction: 'vertical' | 'horizontal'): Promise<void> {
|
||||
const script = direction === 'vertical'
|
||||
? `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 1, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
|
||||
: `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 2, 0, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
|
||||
await jxa(script)
|
||||
}
|
||||
|
||||
async function typeText(text: string): Promise<void> {
|
||||
const escaped = text.replace(/\\/g, '\\\\').replace(/"/g, '\\"')
|
||||
await osascript(`tell application "System Events" to keystroke "${escaped}"`)
|
||||
}
|
||||
|
||||
function getFrontmostAppInfo(): FrontmostAppInfo | null {
|
||||
try {
|
||||
const result = Bun.spawnSync({
|
||||
cmd: ['osascript', '-e', `
|
||||
tell application "System Events"
|
||||
set frontApp to first application process whose frontmost is true
|
||||
set appName to name of frontApp
|
||||
set bundleId to bundle identifier of frontApp
|
||||
return bundleId & "|" & appName
|
||||
end tell
|
||||
`],
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
})
|
||||
const output = new TextDecoder().decode(result.stdout).trim()
|
||||
if (!output || !output.includes('|')) return null
|
||||
const [bundleId, appName] = output.split('|', 2)
|
||||
return { bundleId: bundleId!, appName: appName! }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Exports ----
|
||||
|
||||
export class ComputerUseInputAPI {
|
||||
declare moveMouse: (
|
||||
x: number,
|
||||
y: number,
|
||||
animated: boolean,
|
||||
) => Promise<void>
|
||||
|
||||
declare key: (
|
||||
key: string,
|
||||
action: 'press' | 'release',
|
||||
) => Promise<void>
|
||||
|
||||
declare moveMouse: (x: number, y: number, animated: boolean) => Promise<void>
|
||||
declare key: (key: string, action: 'press' | 'release') => Promise<void>
|
||||
declare keys: (parts: string[]) => Promise<void>
|
||||
|
||||
declare mouseLocation: () => Promise<{ x: number; y: number }>
|
||||
|
||||
declare mouseButton: (
|
||||
button: 'left' | 'right' | 'middle',
|
||||
action: 'click' | 'press' | 'release',
|
||||
count?: number,
|
||||
) => Promise<void>
|
||||
|
||||
declare mouseScroll: (
|
||||
amount: number,
|
||||
direction: 'vertical' | 'horizontal',
|
||||
) => Promise<void>
|
||||
|
||||
declare mouseButton: (button: 'left' | 'right' | 'middle', action: 'click' | 'press' | 'release', count?: number) => Promise<void>
|
||||
declare mouseScroll: (amount: number, direction: 'vertical' | 'horizontal') => Promise<void>
|
||||
declare typeText: (text: string) => Promise<void>
|
||||
|
||||
declare getFrontmostAppInfo: () => FrontmostAppInfo | null
|
||||
|
||||
declare isSupported: true
|
||||
}
|
||||
|
||||
@ -42,3 +177,7 @@ interface ComputerUseInputUnsupported {
|
||||
}
|
||||
|
||||
export type ComputerUseInput = ComputerUseInputAPI | ComputerUseInputUnsupported
|
||||
|
||||
// Plain object with all methods as own properties — compatible with require()
|
||||
export const isSupported = process.platform === 'darwin'
|
||||
export { moveMouse, key, keys, mouseLocation, mouseButton, mouseScroll, typeText, getFrontmostAppInfo }
|
||||
|
||||
Loading…
Reference in New Issue
Block a user