直接在瀏覽器中運行大型語言模型 (LLM) 為保護隱私的客戶端 AI 應用帶來了新的可能性。在本博文中,我們將探討如何使用 WebGPU 和 Hugging Face 的 Transformers.js 庫在瀏覽器中完全運行功能強大的文本到圖像生成模型 DeepSeek Janus-Pro-1B。
由於Transformers.js 和WebGPU 加速 的優化,專為文本到圖像生成等多模態任務設計的DeepSeek Janus-Pro-1B 現在可以通過基於瀏覽器的推理訪問。
以下示例演示如何在 Web Worker 中加載和運行 DeepSeek Janus-Pro-1B 以進行非阻塞推理。完整的代碼可在 GitHub 存儲庫中找到。
import { AutoProcessor, MultiModalityCausalLM, BaseStreamer, TextStreamer, InterruptableStoppingCriteria, } from "@huggingface/transformers"; // 定义常量 const IMAGE_GENERATION_COMMAND_PREFIX = "/imagine "; const MAX_NEW_TEXT_TOKENS = 1024; /** * 用于执行 WebGPU 功能检测的辅助函数 */ let fp16_supported = false; async function check() { try { const adapter = await navigator.gpu.requestAdapter(); if (!adapter) { throw new Error("WebGPU 不受支持(未找到适配器)"); } fp16_supported = adapter.features.has("shader-f16"); self.postMessage({ status: "success", data: fp16_supported, }); } catch (e) { self.postMessage({ status: "error", data: e.toString(), }); } } /** * 此类使用单例模式来启用管道延迟加载 */ class ImageGenerationPipeline { static model_id = "onnx-community/Janus-Pro-1B-ONNX"; static async getInstance(progress_callback = null) { this.processor ??= AutoProcessor.from_pretrained(this.model_id, { progress_callback, }); this.model ??= MultiModalityCausalLM.from_pretrained(this.model_id, { dtype: fp16_supported ? { prepare_inputs_embeds: "q4", language_model: "q4f16", lm_head: "fp16", gen_head: "fp16", gen_img_embeds: "fp16", image_decode: "fp32", } : { prepare_inputs_embeds: "fp32", language_model: "q4", lm_head: "fp32", lm_head: "fp32", gen_head: "fp32", gen_img_embeds: "fp32", image_decode: "fp32", }, device: { prepare_inputs_embeds: "wasm", // TODO 当错误修复后使用“webgpu” language_model: "webgpu", lm_head: "webgpu", gen_head: "webgpu", gen_img_embeds: "webgpu", image_decode: "webgpu", }, progress_callback, }); return Promise.all([this.processor, this.model]); } } class ProgressStreamer extends BaseStreamer { constructor(total, on_progress) { super(); this.total = total; this.on_progress = on_progress; this.count = null; this.start_time = null; } put(value) { if (this.count === null) { // 忽略第一批标记(提示) this.count = 0; this.start_time = performance.now(); return; } const progress = ++this.count / this.total; this.on_progress({ count: this.count, total: this.total, progress, time: performance.now() - this.start_time, }); } end() { /* 什么也不做 */ } } const stopping_criteria = new InterruptableStoppingCriteria(); async function generate(messages) { // 对于此演示,我们只响应最后一条消息 const message = messages.at(-1); // 告诉主线程我们已开始 self.postMessage({ status: "start" }); // 加载管道 const [processor, model] = await ImageGenerationPipeline.getInstance(); // 确定用户是否要生成图像或文本 if (message.content.startsWith(IMAGE_GENERATION_COMMAND_PREFIX)) { const text = message.content.replace(IMAGE_GENERATION_COMMAND_PREFIX, ""); const conversation = [ { role: "", // 使用标题大小写 content: text, }, ]; const inputs = await processor(conversation, { chat_template: "text_to_image", }); const callback_function = (output) => { self.postMessage({ status: "image-update", ...output, }); }; const num_image_tokens = processor.num_image_tokens; const streamer = new ProgressStreamer(num_image_tokens, callback_function); const outputs = await model.generate_images({ ...inputs, min_new_tokens: num_image_tokens, max_new_tokens: num_image_tokens, do_sample: true, streamer, }); const blob = await outputs[0].toBlob(); // 将输出发送回主线程 self.postMessage({ status: "image-update", blob, }); } else { const inputs = await processor( message.image ? [ { role: "", content: "<image_placeholder>\n" + message.content, images: [message.image], }, ] : [ { role: "", content: "您是一位乐于助人的助手。以简洁的方式回答用户的问题。", }, { role: "", content: message.content, }, ], ); let startTime; let numTokens = 0; let tps; const token_callback_function = () => { startTime ??= performance.now(); if (numTokens++ > 0) { tps = (numTokens / (performance.now() - startTime)) * 1000; } }; const callback_function = (output) => { self.postMessage({ status: "text-update", output, tps, numTokens, }); }; const streamer = new TextStreamer(processor.tokenizer, { skip_prompt: true, skip_special_tokens: true, callback_function, token_callback_function, }); // 生成响应 const outputs = await model.generate({ ...inputs, max_new_tokens: MAX_NEW_TEXT_TOKENS, do_sample: false, streamer, stopping_criteria, }); } // 告诉主线程我们已完成 self.postMessage({ status: "complete", }); } async function load() { self.postMessage({ status: "loading", data: "正在加载模型...", }); // 加载管道并将其保存以备将来使用。 const [processor, model] = await ImageGenerationPipeline.getInstance((x) => { // 我们还向管道添加进度回调,以便我们可以 // 跟踪模型加载。 self.postMessage(x); }); self.postMessage({ status: "ready" }); } // 侦听来自主线程的消息 self.addEventListener("message", async (e) => { const { type, data } = e.data; switch (type) { case "check": check(); break; case "load": load(); break; case "generate": stopping_criteria.reset(); generate(data); break; case "interrupt": stopping_criteria.interrupt(); break; case "reset": stopping_criteria.reset(); break; } });
在此處查看實時演示:DeepSeek Janus-Pro-1B 瀏覽器演示。
演示的關鍵功能:
在瀏覽器中運行 DeepSeek Janus-Pro-1B 展示了客戶端 AI 的潛力。借助 Transformers.js 和 WebGPU 等工具,複雜的模型現在可以在受限環境中高效運行,同時保護用戶隱私。
後續步驟:
對於開發人員來說,這標誌著向去中心化、以用戶為中心的 AI 應用的激動人心的轉變。深入研究示例代碼並開始構建! ?
This revised output maintains the original meaning while using different wording and sentence structures. The image remains in its original format and location. The code is also included, though it's a very long code snippet and might benefit from being broken into smaller, more manageable chunks in a real application.
以上是運行DeepSeek Janus-Pro-in瀏覽器:逐步指南的詳細內容。更多資訊請關注PHP中文網其他相關文章!