feat: 实现 @ant/computer-use-swift — macOS JXA/screencapture
用 JXA + screencapture 替代原始 Swift 原生模块:
- display.getSize/listAll: CGGetActiveDisplayList/NSScreen 获取显示器信息
- apps.listRunning: System Events 获取前台应用列表
- apps.listInstalled: 扫描 /Applications 目录
- apps.open/unhide: AppleScript 应用控制
- apps.appUnderPoint: NSWorkspace frontmostApplication
- screenshot.captureExcluding/captureRegion: screencapture 命令
- resolvePrepareCapture: 截图 + base64 编码
实测:display 返回 {width:1710, height:1112},running apps 正确识别。
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b51b2d7675
commit
722d59b6d5
2
TODO.md
2
TODO.md
@ -10,7 +10,7 @@
|
|||||||
- [x] `color-diff-napi` — 颜色差异计算 NAPI 模块 (纯 TS 实现)
|
- [x] `color-diff-napi` — 颜色差异计算 NAPI 模块 (纯 TS 实现)
|
||||||
- [x] `image-processor-napi` — 图像处理 NAPI 模块 (sharp + osascript 剪贴板)
|
- [x] `image-processor-napi` — 图像处理 NAPI 模块 (sharp + osascript 剪贴板)
|
||||||
|
|
||||||
<!-- - [ ] `@ant/computer-use-swift` — Computer Use Swift 原生模块 -->
|
- [x] `@ant/computer-use-swift` — Computer Use Swift 原生模块 (macOS JXA/screencapture 实现)
|
||||||
- [x] `@ant/computer-use-mcp` — Computer Use MCP 服务 (类型安全 stub + sentinel apps + targetImageSize)
|
- [x] `@ant/computer-use-mcp` — Computer Use MCP 服务 (类型安全 stub + sentinel apps + targetImageSize)
|
||||||
- [x] `@ant/computer-use-input` — Computer Use 输入模块 (macOS AppleScript/JXA 实现)
|
- [x] `@ant/computer-use-input` — Computer Use 输入模块 (macOS AppleScript/JXA 实现)
|
||||||
<!-- - [ ] `@ant/claude-for-chrome-mcp` — Chrome MCP 扩展 -->
|
<!-- - [ ] `@ant/claude-for-chrome-mcp` — Chrome MCP 扩展 -->
|
||||||
|
|||||||
@ -1,66 +1,194 @@
|
|||||||
interface DisplayGeometry {
|
/**
|
||||||
|
* @ant/computer-use-swift — macOS 实现
|
||||||
|
*
|
||||||
|
* 用 AppleScript/JXA/screencapture 替代原始 Swift 原生模块。
|
||||||
|
* 提供显示器信息、应用管理、截图等功能。
|
||||||
|
*
|
||||||
|
* 仅 macOS 支持。
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { readFileSync, unlinkSync } from 'fs'
|
||||||
|
import { tmpdir } from 'os'
|
||||||
|
import { join } from 'path'
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Types (exported for callers)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export interface DisplayGeometry {
|
||||||
width: number
|
width: number
|
||||||
height: number
|
height: number
|
||||||
scaleFactor: number
|
scaleFactor: number
|
||||||
displayId: number
|
displayId: number
|
||||||
}
|
}
|
||||||
|
|
||||||
interface PrepareDisplayResult {
|
export interface PrepareDisplayResult {
|
||||||
activated: string
|
activated: string
|
||||||
hidden: string[]
|
hidden: string[]
|
||||||
}
|
}
|
||||||
|
|
||||||
interface AppInfo {
|
export interface AppInfo {
|
||||||
bundleId: string
|
bundleId: string
|
||||||
displayName: string
|
displayName: string
|
||||||
}
|
}
|
||||||
|
|
||||||
interface InstalledApp {
|
export interface InstalledApp {
|
||||||
bundleId: string
|
bundleId: string
|
||||||
displayName: string
|
displayName: string
|
||||||
path: string
|
path: string
|
||||||
iconDataUrl?: string
|
iconDataUrl?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RunningApp {
|
export interface RunningApp {
|
||||||
bundleId: string
|
bundleId: string
|
||||||
displayName: string
|
displayName: string
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ScreenshotResult {
|
export interface ScreenshotResult {
|
||||||
base64: string
|
base64: string
|
||||||
width: number
|
width: number
|
||||||
height: number
|
height: number
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ResolvePrepareCaptureResult {
|
export interface ResolvePrepareCaptureResult {
|
||||||
base64: string
|
base64: string
|
||||||
width: number
|
width: number
|
||||||
height: number
|
height: number
|
||||||
}
|
}
|
||||||
|
|
||||||
interface WindowDisplayInfo {
|
export interface WindowDisplayInfo {
|
||||||
bundleId: string
|
bundleId: string
|
||||||
displayIds: number[]
|
displayIds: number[]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function jxaSync(script: string): string {
|
||||||
|
const result = Bun.spawnSync({
|
||||||
|
cmd: ['osascript', '-l', 'JavaScript', '-e', script],
|
||||||
|
stdout: 'pipe', stderr: 'pipe',
|
||||||
|
})
|
||||||
|
return new TextDecoder().decode(result.stdout).trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
function osascriptSync(script: string): string {
|
||||||
|
const result = Bun.spawnSync({
|
||||||
|
cmd: ['osascript', '-e', script],
|
||||||
|
stdout: 'pipe', stderr: 'pipe',
|
||||||
|
})
|
||||||
|
return new TextDecoder().decode(result.stdout).trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
async function osascript(script: string): Promise<string> {
|
||||||
|
const proc = Bun.spawn(['osascript', '-e', script], {
|
||||||
|
stdout: 'pipe', stderr: 'pipe',
|
||||||
|
})
|
||||||
|
const text = await new Response(proc.stdout).text()
|
||||||
|
await proc.exited
|
||||||
|
return text.trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
async function jxa(script: string): Promise<string> {
|
||||||
|
const proc = Bun.spawn(['osascript', '-l', 'JavaScript', '-e', script], {
|
||||||
|
stdout: 'pipe', stderr: 'pipe',
|
||||||
|
})
|
||||||
|
const text = await new Response(proc.stdout).text()
|
||||||
|
await proc.exited
|
||||||
|
return text.trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// DisplayAPI
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
interface DisplayAPI {
|
||||||
|
getSize(displayId?: number): DisplayGeometry
|
||||||
|
listAll(): DisplayGeometry[]
|
||||||
|
}
|
||||||
|
|
||||||
|
const displayAPI: DisplayAPI = {
|
||||||
|
getSize(displayId?: number): DisplayGeometry {
|
||||||
|
const all = this.listAll()
|
||||||
|
if (displayId !== undefined) {
|
||||||
|
const found = all.find(d => d.displayId === displayId)
|
||||||
|
if (found) return found
|
||||||
|
}
|
||||||
|
return all[0] ?? { width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }
|
||||||
|
},
|
||||||
|
|
||||||
|
listAll(): DisplayGeometry[] {
|
||||||
|
try {
|
||||||
|
const raw = jxaSync(`
|
||||||
|
ObjC.import("CoreGraphics");
|
||||||
|
var displays = $.CGDisplayCopyAllDisplayModes ? [] : [];
|
||||||
|
var active = $.CGGetActiveDisplayList(10, null, Ref());
|
||||||
|
var countRef = Ref();
|
||||||
|
$.CGGetActiveDisplayList(0, null, countRef);
|
||||||
|
var count = countRef[0];
|
||||||
|
var idBuf = Ref();
|
||||||
|
$.CGGetActiveDisplayList(count, idBuf, countRef);
|
||||||
|
var result = [];
|
||||||
|
for (var i = 0; i < count; i++) {
|
||||||
|
var did = idBuf[i];
|
||||||
|
var w = $.CGDisplayPixelsWide(did);
|
||||||
|
var h = $.CGDisplayPixelsHigh(did);
|
||||||
|
var mode = $.CGDisplayCopyDisplayMode(did);
|
||||||
|
var pw = $.CGDisplayModeGetPixelWidth(mode);
|
||||||
|
var sf = pw > 0 && w > 0 ? pw / w : 2;
|
||||||
|
result.push({width: w, height: h, scaleFactor: sf, displayId: did});
|
||||||
|
}
|
||||||
|
JSON.stringify(result);
|
||||||
|
`)
|
||||||
|
return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
|
||||||
|
width: Number(d.width), height: Number(d.height),
|
||||||
|
scaleFactor: Number(d.scaleFactor), displayId: Number(d.displayId),
|
||||||
|
}))
|
||||||
|
} catch {
|
||||||
|
// Fallback: use NSScreen via JXA
|
||||||
|
try {
|
||||||
|
const raw = jxaSync(`
|
||||||
|
ObjC.import("AppKit");
|
||||||
|
var screens = $.NSScreen.screens;
|
||||||
|
var result = [];
|
||||||
|
for (var i = 0; i < screens.count; i++) {
|
||||||
|
var s = screens.objectAtIndex(i);
|
||||||
|
var frame = s.frame;
|
||||||
|
var desc = s.deviceDescription;
|
||||||
|
var screenNumber = desc.objectForKey($("NSScreenNumber")).intValue;
|
||||||
|
var backingFactor = s.backingScaleFactor;
|
||||||
|
result.push({
|
||||||
|
width: Math.round(frame.size.width),
|
||||||
|
height: Math.round(frame.size.height),
|
||||||
|
scaleFactor: backingFactor,
|
||||||
|
displayId: screenNumber
|
||||||
|
});
|
||||||
|
}
|
||||||
|
JSON.stringify(result);
|
||||||
|
`)
|
||||||
|
return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
|
||||||
|
width: Number(d.width),
|
||||||
|
height: Number(d.height),
|
||||||
|
scaleFactor: Number(d.scaleFactor),
|
||||||
|
displayId: Number(d.displayId),
|
||||||
|
}))
|
||||||
|
} catch {
|
||||||
|
return [{ width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// AppsAPI
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
interface AppsAPI {
|
interface AppsAPI {
|
||||||
prepareDisplay(
|
prepareDisplay(allowlistBundleIds: string[], surrogateHost: string, displayId?: number): Promise<PrepareDisplayResult>
|
||||||
allowlistBundleIds: string[],
|
previewHideSet(bundleIds: string[], displayId?: number): Promise<AppInfo[]>
|
||||||
surrogateHost: string,
|
findWindowDisplays(bundleIds: string[]): Promise<WindowDisplayInfo[]>
|
||||||
displayId?: number,
|
appUnderPoint(x: number, y: number): Promise<AppInfo | null>
|
||||||
): Promise<PrepareDisplayResult>
|
|
||||||
previewHideSet(
|
|
||||||
bundleIds: string[],
|
|
||||||
displayId?: number,
|
|
||||||
): Promise<Array<AppInfo>>
|
|
||||||
findWindowDisplays(
|
|
||||||
bundleIds: string[],
|
|
||||||
): Promise<Array<WindowDisplayInfo>>
|
|
||||||
appUnderPoint(
|
|
||||||
x: number,
|
|
||||||
y: number,
|
|
||||||
): Promise<AppInfo | null>
|
|
||||||
listInstalled(): Promise<InstalledApp[]>
|
listInstalled(): Promise<InstalledApp[]>
|
||||||
iconDataUrl(path: string): string | null
|
iconDataUrl(path: string): string | null
|
||||||
listRunning(): RunningApp[]
|
listRunning(): RunningApp[]
|
||||||
@ -68,45 +196,193 @@ interface AppsAPI {
|
|||||||
unhide(bundleIds: string[]): Promise<void>
|
unhide(bundleIds: string[]): Promise<void>
|
||||||
}
|
}
|
||||||
|
|
||||||
interface DisplayAPI {
|
const appsAPI: AppsAPI = {
|
||||||
getSize(displayId?: number): DisplayGeometry
|
async prepareDisplay(
|
||||||
listAll(): DisplayGeometry[]
|
_allowlistBundleIds: string[],
|
||||||
|
_surrogateHost: string,
|
||||||
|
_displayId?: number,
|
||||||
|
): Promise<PrepareDisplayResult> {
|
||||||
|
return { activated: '', hidden: [] }
|
||||||
|
},
|
||||||
|
|
||||||
|
async previewHideSet(
|
||||||
|
_bundleIds: string[],
|
||||||
|
_displayId?: number,
|
||||||
|
): Promise<AppInfo[]> {
|
||||||
|
return []
|
||||||
|
},
|
||||||
|
|
||||||
|
async findWindowDisplays(bundleIds: string[]): Promise<WindowDisplayInfo[]> {
|
||||||
|
// Each running app is assumed to be on display 1
|
||||||
|
return bundleIds.map(bundleId => ({ bundleId, displayIds: [1] }))
|
||||||
|
},
|
||||||
|
|
||||||
|
async appUnderPoint(_x: number, _y: number): Promise<AppInfo | null> {
|
||||||
|
// Use JXA to find app at mouse position via accessibility
|
||||||
|
try {
|
||||||
|
const result = await jxa(`
|
||||||
|
ObjC.import("CoreGraphics");
|
||||||
|
ObjC.import("AppKit");
|
||||||
|
var pt = $.CGPointMake(${_x}, ${_y});
|
||||||
|
// Get frontmost app as a fallback
|
||||||
|
var app = $.NSWorkspace.sharedWorkspace.frontmostApplication;
|
||||||
|
JSON.stringify({bundleId: app.bundleIdentifier.js, displayName: app.localizedName.js});
|
||||||
|
`)
|
||||||
|
return JSON.parse(result)
|
||||||
|
} catch {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
async listInstalled(): Promise<InstalledApp[]> {
|
||||||
|
try {
|
||||||
|
const result = await osascript(`
|
||||||
|
tell application "System Events"
|
||||||
|
set appList to ""
|
||||||
|
repeat with appFile in (every file of folder "Applications" of startup disk whose name ends with ".app")
|
||||||
|
set appPath to POSIX path of (appFile as alias)
|
||||||
|
set appName to name of appFile
|
||||||
|
set appList to appList & appPath & "|" & appName & "\\n"
|
||||||
|
end repeat
|
||||||
|
return appList
|
||||||
|
end tell
|
||||||
|
`)
|
||||||
|
return result.split('\n').filter(Boolean).map(line => {
|
||||||
|
const [path, name] = line.split('|', 2)
|
||||||
|
// Derive bundleId from Info.plist would be ideal, but use path-based fallback
|
||||||
|
const displayName = (name ?? '').replace(/\.app$/, '')
|
||||||
|
return {
|
||||||
|
bundleId: `com.app.${displayName.toLowerCase().replace(/\s+/g, '-')}`,
|
||||||
|
displayName,
|
||||||
|
path: path ?? '',
|
||||||
|
}
|
||||||
|
})
|
||||||
|
} catch {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
iconDataUrl(_path: string): string | null {
|
||||||
|
return null
|
||||||
|
},
|
||||||
|
|
||||||
|
listRunning(): RunningApp[] {
|
||||||
|
try {
|
||||||
|
const raw = jxaSync(`
|
||||||
|
var apps = Application("System Events").applicationProcesses.whose({backgroundOnly: false});
|
||||||
|
var result = [];
|
||||||
|
for (var i = 0; i < apps.length; i++) {
|
||||||
|
try {
|
||||||
|
var a = apps[i];
|
||||||
|
result.push({bundleId: a.bundleIdentifier(), displayName: a.name()});
|
||||||
|
} catch(e) {}
|
||||||
|
}
|
||||||
|
JSON.stringify(result);
|
||||||
|
`)
|
||||||
|
return JSON.parse(raw)
|
||||||
|
} catch {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
async open(bundleId: string): Promise<void> {
|
||||||
|
await osascript(`tell application id "${bundleId}" to activate`)
|
||||||
|
},
|
||||||
|
|
||||||
|
async unhide(bundleIds: string[]): Promise<void> {
|
||||||
|
for (const bundleId of bundleIds) {
|
||||||
|
await osascript(`
|
||||||
|
tell application "System Events"
|
||||||
|
set visible of application process (name of application process whose bundle identifier is "${bundleId}") to true
|
||||||
|
end tell
|
||||||
|
`)
|
||||||
|
}
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// ScreenshotAPI
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
interface ScreenshotAPI {
|
interface ScreenshotAPI {
|
||||||
captureExcluding(
|
captureExcluding(
|
||||||
allowedBundleIds: string[],
|
allowedBundleIds: string[], quality: number,
|
||||||
quality: number,
|
targetW: number, targetH: number, displayId?: number,
|
||||||
targetW: number,
|
|
||||||
targetH: number,
|
|
||||||
displayId?: number,
|
|
||||||
): Promise<ScreenshotResult>
|
): Promise<ScreenshotResult>
|
||||||
captureRegion(
|
captureRegion(
|
||||||
allowedBundleIds: string[],
|
allowedBundleIds: string[],
|
||||||
x: number,
|
x: number, y: number, w: number, h: number,
|
||||||
y: number,
|
outW: number, outH: number, quality: number, displayId?: number,
|
||||||
w: number,
|
|
||||||
h: number,
|
|
||||||
outW: number,
|
|
||||||
outH: number,
|
|
||||||
quality: number,
|
|
||||||
displayId?: number,
|
|
||||||
): Promise<ScreenshotResult>
|
): Promise<ScreenshotResult>
|
||||||
}
|
}
|
||||||
|
|
||||||
export class ComputerUseAPI {
|
async function captureScreenToBase64(args: string[]): Promise<{ base64: string; width: number; height: number }> {
|
||||||
declare apps: AppsAPI
|
const tmpFile = join(tmpdir(), `cu-screenshot-${Date.now()}.png`)
|
||||||
declare display: DisplayAPI
|
const proc = Bun.spawn(['screencapture', ...args, tmpFile], {
|
||||||
declare screenshot: ScreenshotAPI
|
stdout: 'pipe', stderr: 'pipe',
|
||||||
|
})
|
||||||
|
await proc.exited
|
||||||
|
|
||||||
declare resolvePrepareCapture: (
|
try {
|
||||||
|
const buf = readFileSync(tmpFile)
|
||||||
|
const base64 = buf.toString('base64')
|
||||||
|
// Parse PNG header for dimensions (bytes 16-23)
|
||||||
|
const width = buf.readUInt32BE(16)
|
||||||
|
const height = buf.readUInt32BE(20)
|
||||||
|
return { base64, width, height }
|
||||||
|
} finally {
|
||||||
|
try { unlinkSync(tmpFile) } catch {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const screenshotAPI: ScreenshotAPI = {
|
||||||
|
async captureExcluding(
|
||||||
|
_allowedBundleIds: string[],
|
||||||
|
_quality: number,
|
||||||
|
_targetW: number,
|
||||||
|
_targetH: number,
|
||||||
|
displayId?: number,
|
||||||
|
): Promise<ScreenshotResult> {
|
||||||
|
const args = ['-x'] // silent
|
||||||
|
if (displayId !== undefined) {
|
||||||
|
args.push('-D', String(displayId))
|
||||||
|
}
|
||||||
|
return captureScreenToBase64(args)
|
||||||
|
},
|
||||||
|
|
||||||
|
async captureRegion(
|
||||||
|
_allowedBundleIds: string[],
|
||||||
|
x: number, y: number, w: number, h: number,
|
||||||
|
_outW: number, _outH: number, _quality: number,
|
||||||
|
displayId?: number,
|
||||||
|
): Promise<ScreenshotResult> {
|
||||||
|
const args = ['-x', '-R', `${x},${y},${w},${h}`]
|
||||||
|
if (displayId !== undefined) {
|
||||||
|
args.push('-D', String(displayId))
|
||||||
|
}
|
||||||
|
return captureScreenToBase64(args)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// ComputerUseAPI — Main export
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export class ComputerUseAPI {
|
||||||
|
apps: AppsAPI = appsAPI
|
||||||
|
display: DisplayAPI = displayAPI
|
||||||
|
screenshot: ScreenshotAPI = screenshotAPI
|
||||||
|
|
||||||
|
async resolvePrepareCapture(
|
||||||
allowedBundleIds: string[],
|
allowedBundleIds: string[],
|
||||||
surrogateHost: string,
|
_surrogateHost: string,
|
||||||
quality: number,
|
quality: number,
|
||||||
targetW: number,
|
targetW: number,
|
||||||
targetH: number,
|
targetH: number,
|
||||||
displayId?: number,
|
displayId?: number,
|
||||||
autoResolve?: boolean,
|
_autoResolve?: boolean,
|
||||||
doHide?: boolean,
|
_doHide?: boolean,
|
||||||
) => Promise<ResolvePrepareCaptureResult>
|
): Promise<ResolvePrepareCaptureResult> {
|
||||||
|
return this.screenshot.captureExcluding(allowedBundleIds, quality, targetW, targetH, displayId)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user