Running DeepSeek Janus-Pro-in the Browser: A Step-by-Step Guide-JS Tutorial-php.cn

Running DeepSeek Janus-Pro-in the Browser: A Step-by-Step Guide

Running a large language model (LLM) directly in the browser brings new possibilities to the client AI application that protects privacy. In this blog post, we will explore how to use WebGPU and Hugging Face's transformers.js library in the browser to fully run a powerful text to image generation model

Deepseek Janus-PRO-1B .

Why choose a browser -based reasoning?

Privacy

: The data will never leave the user's device.

Cost benefits : No server infrastructure is required.
: It can run on any device with a modern browser and WebGPU.

Transformers.js

and WebGPU accelerate optimization, deeds dedicated to the design of multi-modal tasks such as text-to-image generation Interview with reasoning.

Key tools and libraries

Transformers.js : JavaScript version of the Transformers library of Hugging Face, optimized for the execution of the browser.

WebGPU : Modern API for GPU accelerated in the browser, it replaces WebGL with the improved ML workload performance.

Onnx Runtime

Demonstration code exercise
The following example demonstrates how to load and run Deepseek Janus-PRO-1B in the Web Worker for non-blocking reasoning. The complete code can be found in the GitHub repository.
Run the demonstration

Deepseek Janus-PRO-1B browser demonstration

. The key features of the demonstration

import {
  AutoProcessor,
  MultiModalityCausalLM,
  BaseStreamer,
  TextStreamer,
  InterruptableStoppingCriteria,
} from "@huggingface/transformers";

// 定义常量
const IMAGE_GENERATION_COMMAND_PREFIX = "/imagine ";
const MAX_NEW_TEXT_TOKENS = 1024;

/**
 * 用于执行 WebGPU 功能检测的辅助函数
 */
let fp16_supported = false;
async function check() {
  try {
    const adapter = await navigator.gpu.requestAdapter();
    if (!adapter) {
      throw new Error("WebGPU 不受支持（未找到适配器）");
    }
    fp16_supported = adapter.features.has("shader-f16");
    self.postMessage({
      status: "success",
      data: fp16_supported,
    });
  } catch (e) {
    self.postMessage({
      status: "error",
      data: e.toString(),
    });
  }
}

/**
 * 此类使用单例模式来启用管道延迟加载
 */
class ImageGenerationPipeline {
  static model_id = "onnx-community/Janus-Pro-1B-ONNX";

  static async getInstance(progress_callback = null) {
    this.processor ??= AutoProcessor.from_pretrained(this.model_id, {
      progress_callback,
    });

    this.model ??= MultiModalityCausalLM.from_pretrained(this.model_id, {
      dtype: fp16_supported
        ? {
            prepare_inputs_embeds: "q4",
            language_model: "q4f16",
            lm_head: "fp16",
            gen_head: "fp16",
            gen_img_embeds: "fp16",
            image_decode: "fp32",
          }
        : {
            prepare_inputs_embeds: "fp32",
            language_model: "q4",
            lm_head: "fp32",
            lm_head: "fp32",
            gen_head: "fp32",
            gen_img_embeds: "fp32",
            image_decode: "fp32",
          },
      device: {
        prepare_inputs_embeds: "wasm", // TODO 当错误修复后使用“webgpu”
        language_model: "webgpu",
        lm_head: "webgpu",
        gen_head: "webgpu",
        gen_img_embeds: "webgpu",
        image_decode: "webgpu",
      },
      progress_callback,
    });

    return Promise.all([this.processor, this.model]);
  }
}

class ProgressStreamer extends BaseStreamer {
  constructor(total, on_progress) {
    super();
    this.total = total;
    this.on_progress = on_progress;

    this.count = null;
    this.start_time = null;
  }

  put(value) {
    if (this.count === null) {
      // 忽略第一批标记（提示）
      this.count = 0;
      this.start_time = performance.now();
      return;
    }

    const progress = ++this.count / this.total;

    this.on_progress({
      count: this.count,
      total: this.total,
      progress,
      time: performance.now() - this.start_time,
    });
  }

  end() {
    /* 什么也不做 */
  }
}

const stopping_criteria = new InterruptableStoppingCriteria();

async function generate(messages) {
  // 对于此演示，我们只响应最后一条消息
  const message = messages.at(-1);

  // 告诉主线程我们已开始
  self.postMessage({ status: "start" });

  // 加载管道
  const [processor, model] = await ImageGenerationPipeline.getInstance();

  // 确定用户是否要生成图像或文本
  if (message.content.startsWith(IMAGE_GENERATION_COMMAND_PREFIX)) {
    const text = message.content.replace(IMAGE_GENERATION_COMMAND_PREFIX, "");

    const conversation = [
      {
        role: "", // 使用标题大小写
        content: text,
      },
    ];
    const inputs = await processor(conversation, {
      chat_template: "text_to_image",
    });

    const callback_function = (output) => {
      self.postMessage({
        status: "image-update",
        ...output,
      });
    };

    const num_image_tokens = processor.num_image_tokens;
    const streamer = new ProgressStreamer(num_image_tokens, callback_function);

    const outputs = await model.generate_images({
      ...inputs,
      min_new_tokens: num_image_tokens,
      max_new_tokens: num_image_tokens,
      do_sample: true,
      streamer,
    });

    const blob = await outputs[0].toBlob();

    // 将输出发送回主线程
    self.postMessage({
      status: "image-update",
      blob,
    });
  } else {
    const inputs = await processor(
      message.image
        ? [
            {
              role: "",
              content: "<image_placeholder>\n" + message.content,
              images: [message.image],
            },
          ]
        : [
            {
              role: "",
              content:
                "您是一位乐于助人的助手。以简洁的方式回答用户的问题。",
            },
            {
              role: "",
              content: message.content,
            },
          ],
    );

    let startTime;
    let numTokens = 0;
    let tps;
    const token_callback_function = () => {
      startTime ??= performance.now();

      if (numTokens++ > 0) {
        tps = (numTokens / (performance.now() - startTime)) * 1000;
      }
    };
    const callback_function = (output) => {
      self.postMessage({
        status: "text-update",
        output,
        tps,
        numTokens,
      });
    };

    const streamer = new TextStreamer(processor.tokenizer, {
      skip_prompt: true,
      skip_special_tokens: true,
      callback_function,
      token_callback_function,
    });

    // 生成响应
    const outputs = await model.generate({
      ...inputs,
      max_new_tokens: MAX_NEW_TEXT_TOKENS,
      do_sample: false,
      streamer,
      stopping_criteria,
    });
  }

  // 告诉主线程我们已完成
  self.postMessage({
    status: "complete",
  });
}

async function load() {
  self.postMessage({
    status: "loading",
    data: "正在加载模型...",
  });

  // 加载管道并将其保存以备将来使用。
  const [processor, model] = await ImageGenerationPipeline.getInstance((x) => {
    // 我们还向管道添加进度回调，以便我们可以
    // 跟踪模型加载。
    self.postMessage(x);
  });

  self.postMessage({ status: "ready" });
}

// 侦听来自主线程的消息
self.addEventListener("message", async (e) => {
  const { type, data } = e.data;

  switch (type) {
    case "check":
      check();
      break;

    case "load":
      load();
      break;

    case "generate":
      stopping_criteria.reset();
      generate(data);
      break;

    case "interrupt":
      stopping_criteria.interrupt();
      break;

    case "reset":
      stopping_criteria.reset();
      break;
  }
});

Copy after login

The real -time progress of the model loading and reasoning is updated.

WebGPU is accelerated (required Chrome 113 or Edge 113).

Complete client execution -the data will not be sent to the external server.

Challenge and optimization

Model quantification

: Model quantification to 8 digits to reduce its size and increase loading speed.

Memory management
:: WebGPU is still in the test stage, but it is essential for performance.

Conclusion

Running Deepseek Janus-PRO-1B in the browser shows the potential of the client AI. With tools such as Transformers.js and WebGPUs, complex models can now run efficiently in the limited environment, while protecting user privacy.

Follow -up steps :
- Try different prompts and model configurations.
- Exploring the fine -tuning model for mission for specific fields.
- Monitor the adoption of WebGPU to ensure wider compatibility.
For developers, this marks the exciting transformation of the descent and user -centric AI applications. In -depth research on the example code and start construction! ?

This Revied Output Maintains The Original Meaning While USING DIFFFERENG and Sentence Structures. He code is also included, though it's a very long code snippet and might benefit from being brooken into Smaller, More manageable chunks in a real application.

The above is the detailed content of Running DeepSeek Janus-Pro-in the Browser: A Step-by-Step Guide. For more information, please follow other related articles on the PHP Chinese website!