LLaVa 1.6 Vicuna
In [0]:
Copied!
%pip install mlflow-extensions
%pip install mlflow -U
%pip install sglang==0.2.13 outlines==0.0.44
dbutils.library.restartPython()
%pip install mlflow-extensions
%pip install mlflow -U
%pip install sglang==0.2.13 outlines==0.0.44
dbutils.library.restartPython()
In [0]:
Copied!
from mlflow_extensions.databricks.deploy.ez_deploy import EzDeploy
from mlflow_extensions.databricks.prebuilt import prebuilt
deployer = EzDeploy(
config=prebuilt.vision.sglang.LLAVA_NEXT_LLAMA3_8B_CONFIG,
registered_model_name="main.default.llava_next_llama3_8b_based"
)
deployer.download()
deployer.register()
endpoint_name = "llava_next_llama3_8b_sglang"
deployer.deploy(endpoint_name)
from mlflow_extensions.databricks.deploy.ez_deploy import EzDeploy
from mlflow_extensions.databricks.prebuilt import prebuilt
deployer = EzDeploy(
config=prebuilt.vision.sglang.LLAVA_NEXT_LLAMA3_8B_CONFIG,
registered_model_name="main.default.llava_next_llama3_8b_based"
)
deployer.download()
deployer.register()
endpoint_name = "llava_next_llama3_8b_sglang"
deployer.deploy(endpoint_name)
In [0]:
Copied!
from mlflow_extensions.serving.compat.openai import OpenAI
from mlflow.utils.databricks_utils import get_databricks_host_creds
workspace_host = spark.conf.get("spark.databricks.workspaceUrl")
endpoint_name = "llava_next_llama3_8b_sglang"
endpoint_url = f"https://{workspace_host}/serving-endpoints/{endpoint_name}/invocations"
token = get_databricks_host_creds().token
client = OpenAI(
base_url=endpoint_url,
api_key=token
)
client = OpenAI(base_url=endpoint_url, api_key=token)
# print(client.models.list())
response = client.chat.completions.create(
model="lmms-lab/llama3-llava-next-8b",
max_tokens=256,
messages=[
{"role": "user", "content": [
{"type": "text", "text": "Explain the content of the image? What is in the background?"},
{
"type": "image_url",
"image_url": {
"url": "https://richmedia.ca-richimage.com/ImageDelivery/imageService?profileId=12026540&id=1859027&recipeId=728"
},
},
],
}
],
)
print(response.choices[0].message.content)
from mlflow_extensions.serving.compat.openai import OpenAI
from mlflow.utils.databricks_utils import get_databricks_host_creds
workspace_host = spark.conf.get("spark.databricks.workspaceUrl")
endpoint_name = "llava_next_llama3_8b_sglang"
endpoint_url = f"https://{workspace_host}/serving-endpoints/{endpoint_name}/invocations"
token = get_databricks_host_creds().token
client = OpenAI(
base_url=endpoint_url,
api_key=token
)
client = OpenAI(base_url=endpoint_url, api_key=token)
# print(client.models.list())
response = client.chat.completions.create(
model="lmms-lab/llama3-llava-next-8b",
max_tokens=256,
messages=[
{"role": "user", "content": [
{"type": "text", "text": "Explain the content of the image? What is in the background?"},
{
"type": "image_url",
"image_url": {
"url": "https://richmedia.ca-richimage.com/ImageDelivery/imageService?profileId=12026540&id=1859027&recipeId=728"
},
},
],
}
],
)
print(response.choices[0].message.content)
In [0]:
Copied!
from mlflow_extensions.serving.compat.sglang import RuntimeEndpoint
from mlflow.utils.databricks_utils import get_databricks_host_creds
from sglang import set_default_backend
from sglang.srt.constrained import build_regex_from_object
import sglang as sgl
from pydantic import BaseModel
from typing import Literal
import requests
# the first run takes a bit longer to compile the FSM on the server, all subsequent requests will be fast
# the first call may take 10-30 seconds depending on the complexity of the pydantic object
token = get_databricks_host_creds().token
# connect sglang frontend (this python code) to the backend (model serving endpoint)
set_default_backend(RuntimeEndpoint(endpoint_url, token))
class Fashion(BaseModel):
color: Literal["black", "blue", "gray"]
material: Literal["silk", "denim", "fabric"]
gender: Literal["male", "female"]
fashion = build_regex_from_object(Fashion)
# fix a small regex bug with outlines + sglang for strings
fashion = fashion.replace(r"""([^"\\\x00-\x1f\x7f-\x9f]|\\\\)""", "[\w\d\s]")
print(fashion)
@sgl.function
def image_qa(s, image_file):
s += sgl.user(sgl.image(image_file))
s += "Fill in the details about the item... \n"
s += sgl.gen(
"clothing_details",
max_tokens=128,
temperature=0,
regex=fashion, # Requires pydantic >= 2.0
)
# URL you want to fetch
url = "https://richmedia.ca-richimage.com/ImageDelivery/imageService?profileId=12026540&id=1859027&recipeId=728"
response = requests.get(url)
response.raise_for_status() # Check for request errors
# only need to send the bytes no download, etc
data = image_qa.run(
image_file=response.content,
)
# access by the generation key you asked for
print(data["clothing_details"])
from mlflow_extensions.serving.compat.sglang import RuntimeEndpoint
from mlflow.utils.databricks_utils import get_databricks_host_creds
from sglang import set_default_backend
from sglang.srt.constrained import build_regex_from_object
import sglang as sgl
from pydantic import BaseModel
from typing import Literal
import requests
# the first run takes a bit longer to compile the FSM on the server, all subsequent requests will be fast
# the first call may take 10-30 seconds depending on the complexity of the pydantic object
token = get_databricks_host_creds().token
# connect sglang frontend (this python code) to the backend (model serving endpoint)
set_default_backend(RuntimeEndpoint(endpoint_url, token))
class Fashion(BaseModel):
color: Literal["black", "blue", "gray"]
material: Literal["silk", "denim", "fabric"]
gender: Literal["male", "female"]
fashion = build_regex_from_object(Fashion)
# fix a small regex bug with outlines + sglang for strings
fashion = fashion.replace(r"""([^"\\\x00-\x1f\x7f-\x9f]|\\\\)""", "[\w\d\s]")
print(fashion)
@sgl.function
def image_qa(s, image_file):
s += sgl.user(sgl.image(image_file))
s += "Fill in the details about the item... \n"
s += sgl.gen(
"clothing_details",
max_tokens=128,
temperature=0,
regex=fashion, # Requires pydantic >= 2.0
)
# URL you want to fetch
url = "https://richmedia.ca-richimage.com/ImageDelivery/imageService?profileId=12026540&id=1859027&recipeId=728"
response = requests.get(url)
response.raise_for_status() # Check for request errors
# only need to send the bytes no download, etc
data = image_qa.run(
image_file=response.content,
)
# access by the generation key you asked for
print(data["clothing_details"])
In [0]:
Copied!
from mlflow_extensions.serving.compat.sglang import RuntimeEndpoint
from mlflow.utils.databricks_utils import get_databricks_host_creds
from sglang import set_default_backend
from sglang.srt.constrained import build_regex_from_object
import sglang as sgl
from pydantic import BaseModel
from typing import Literal
import requests
# the first run takes a bit longer to compile the FSM on the server, all subsequent requests will be fast
# the first call may take 10-30 seconds depending on the complexity of the pydantic object
token = get_databricks_host_creds().token
# connect sglang frontend (this python code) to the backend (model serving endpoint)
set_default_backend(RuntimeEndpoint(endpoint_url, token))
class FashionProblems(BaseModel):
description: str
clothing_type: Literal["shirt", "pants", "dress", "skirt", "shoes"]
color: Literal["black", "blue", "gray"]
material: Literal["silk", "denim", "fabric"]
fashion_problems = build_regex_from_object(FashionProblems)
# fix a small regex bug with outlines + sglang for strings
fashion_problems = fashion_problems.replace(r"""([^"\\\x00-\x1f\x7f-\x9f]|\\\\)""", "[\w\d\s]")
print(fashion_problems)
@sgl.function
def image_qa(s, image_file):
s += sgl.user(sgl.image(image_file))
s += "Fill in the problems about the product... \n"
s += sgl.gen(
"clothing_details",
max_tokens=128,
temperature=0,
regex=fashion_problems, # Requires pydantic >= 2.0
)
# URL you want to fetch
url = "https://m.media-amazon.com/images/I/51a94AxNRPL.jpg"
response = requests.get(url)
response.raise_for_status() # Check for request errors
# only need to send the bytes no download, etc
data = image_qa.run(
image_file=response.content,
)
# access by the generation key you asked for
print(data["clothing_details"])
from mlflow_extensions.serving.compat.sglang import RuntimeEndpoint
from mlflow.utils.databricks_utils import get_databricks_host_creds
from sglang import set_default_backend
from sglang.srt.constrained import build_regex_from_object
import sglang as sgl
from pydantic import BaseModel
from typing import Literal
import requests
# the first run takes a bit longer to compile the FSM on the server, all subsequent requests will be fast
# the first call may take 10-30 seconds depending on the complexity of the pydantic object
token = get_databricks_host_creds().token
# connect sglang frontend (this python code) to the backend (model serving endpoint)
set_default_backend(RuntimeEndpoint(endpoint_url, token))
class FashionProblems(BaseModel):
description: str
clothing_type: Literal["shirt", "pants", "dress", "skirt", "shoes"]
color: Literal["black", "blue", "gray"]
material: Literal["silk", "denim", "fabric"]
fashion_problems = build_regex_from_object(FashionProblems)
# fix a small regex bug with outlines + sglang for strings
fashion_problems = fashion_problems.replace(r"""([^"\\\x00-\x1f\x7f-\x9f]|\\\\)""", "[\w\d\s]")
print(fashion_problems)
@sgl.function
def image_qa(s, image_file):
s += sgl.user(sgl.image(image_file))
s += "Fill in the problems about the product... \n"
s += sgl.gen(
"clothing_details",
max_tokens=128,
temperature=0,
regex=fashion_problems, # Requires pydantic >= 2.0
)
# URL you want to fetch
url = "https://m.media-amazon.com/images/I/51a94AxNRPL.jpg"
response = requests.get(url)
response.raise_for_status() # Check for request errors
# only need to send the bytes no download, etc
data = image_qa.run(
image_file=response.content,
)
# access by the generation key you asked for
print(data["clothing_details"])
In [0]:
Copied!
from mlflow_extensions.serving.compat.sglang import RuntimeEndpoint
from mlflow.utils.databricks_utils import get_databricks_host_creds
from sglang import set_default_backend
from sglang.srt.constrained import build_regex_from_object
import sglang as sgl
from pydantic import BaseModel
from typing import Literal, List
import requests
# the first run takes a bit longer to compile the FSM on the server, all subsequent requests will be fast
# the first call may take 10-30 seconds depending on the complexity of the pydantic object
token = get_databricks_host_creds().token
# connect sglang frontend (this python code) to the backend (model serving endpoint)
set_default_backend(RuntimeEndpoint(endpoint_url, token))
class StockoutProblems(BaseModel):
description: str
stockout: bool
types_of_products: List[Literal["food", "clothing", "appliances", "durables"]]
stockout_problems = build_regex_from_object(StockoutProblems)
# fix a small regex bug with outlines + sglang for strings
stockout_problems = stockout_problems.replace(r"""([^"\\\x00-\x1f\x7f-\x9f]|\\\\)""", "[\w\d\s]")
print(stockout_problems)
@sgl.function
def image_qa(s, image_file):
s += sgl.user(sgl.image(image_file))
s += "Is there an out of stock situation? What type of product seems to be out of stock? (food/clothing/appliances/durables) \n"
s += sgl.gen(
"stockout_details",
max_tokens=128,
temperature=0,
regex=stockout_problems, # Requires pydantic >= 2.0
)
# URL you want to fetch
url = "https://assets.eposnow.com/public/content-images/pexels-roy-broo-empty-shelves-grocery-items.jpg"
response = requests.get(url)
response.raise_for_status() # Check for request errors
# only need to send the bytes no download, etc
data = image_qa.run(
image_file=response.content,
)
# access by the generation key you asked for
print(data["stockout_details"])
from mlflow_extensions.serving.compat.sglang import RuntimeEndpoint
from mlflow.utils.databricks_utils import get_databricks_host_creds
from sglang import set_default_backend
from sglang.srt.constrained import build_regex_from_object
import sglang as sgl
from pydantic import BaseModel
from typing import Literal, List
import requests
# the first run takes a bit longer to compile the FSM on the server, all subsequent requests will be fast
# the first call may take 10-30 seconds depending on the complexity of the pydantic object
token = get_databricks_host_creds().token
# connect sglang frontend (this python code) to the backend (model serving endpoint)
set_default_backend(RuntimeEndpoint(endpoint_url, token))
class StockoutProblems(BaseModel):
description: str
stockout: bool
types_of_products: List[Literal["food", "clothing", "appliances", "durables"]]
stockout_problems = build_regex_from_object(StockoutProblems)
# fix a small regex bug with outlines + sglang for strings
stockout_problems = stockout_problems.replace(r"""([^"\\\x00-\x1f\x7f-\x9f]|\\\\)""", "[\w\d\s]")
print(stockout_problems)
@sgl.function
def image_qa(s, image_file):
s += sgl.user(sgl.image(image_file))
s += "Is there an out of stock situation? What type of product seems to be out of stock? (food/clothing/appliances/durables) \n"
s += sgl.gen(
"stockout_details",
max_tokens=128,
temperature=0,
regex=stockout_problems, # Requires pydantic >= 2.0
)
# URL you want to fetch
url = "https://assets.eposnow.com/public/content-images/pexels-roy-broo-empty-shelves-grocery-items.jpg"
response = requests.get(url)
response.raise_for_status() # Check for request errors
# only need to send the bytes no download, etc
data = image_qa.run(
image_file=response.content,
)
# access by the generation key you asked for
print(data["stockout_details"])