|
1 | 1 | package main |
2 | 2 |
|
3 | 3 | import ( |
4 | | -"bytes" |
5 | | -"encoding/base64" |
6 | | -"encoding/json" |
| 4 | +"context" |
7 | 5 | "fmt" |
8 | | -"io" |
9 | 6 | "log" |
10 | | -"net/http" |
11 | 7 | "os" |
12 | 8 | "path/filepath" |
13 | 9 | "strings" |
14 | 10 |
|
15 | 11 | "github.com/alexflint/go-arg" |
| 12 | +"github.com/ollama/ollama/api" |
16 | 13 | ) |
17 | 14 |
|
18 | | -constOLLAMA_HOST="http://192.168.1.230:11434" |
19 | | - |
20 | | -typeOllamaMessagestruct { |
21 | | -Rolestring |
22 | | -Contentstring |
23 | | -Images []string |
24 | | -} |
25 | | - |
26 | | -typeOllamaRequeststruct { |
27 | | -Modelstring |
28 | | -Streambool |
29 | | -Messages []OllamaMessage |
30 | | -Formatstring |
31 | | -Options []string |
32 | | -} |
33 | | - |
34 | | -typeOllamaResponsestruct { |
35 | | -MessageOllamaMessage`json:"message"` |
36 | | -} |
37 | | - |
38 | | -funcGenerateWithImage(prompt,imagePathstring) (string,error) { |
| 15 | +funcGenerateWithImage(ol*api.Client,model,prompt,imagePathstring) (string,error) { |
39 | 16 | // First, convert the image to base64 |
40 | 17 | imageData,err:=os.ReadFile(imagePath) |
41 | 18 | iferr!=nil { |
42 | 19 | return"",fmt.Errorf("failed to read image: %w",err) |
43 | 20 | } |
44 | 21 |
|
45 | | -base64Image:=base64.StdEncoding.EncodeToString(imageData) |
46 | | - |
47 | | -msg:=OllamaMessage{ |
| 22 | +msg:= api.Message{ |
48 | 23 | Role:"user", |
49 | 24 | Content:prompt, |
50 | | -Images: []string{base64Image}, |
51 | | -} |
52 | | - |
53 | | -// Prepare the request body |
54 | | -reqBody:=OllamaRequest{ |
55 | | -Model:"x/llama3.2-vision", |
56 | | -Messages: []OllamaMessage{msg}, |
57 | | -Stream:false, |
58 | | -} |
59 | | - |
60 | | -// Marshal the request body to JSON |
61 | | -jsonData,err:=json.Marshal(reqBody) |
62 | | -iferr!=nil { |
63 | | -return"",fmt.Errorf("failed to marshal JSON: %w",err) |
| 25 | +Images: []api.ImageData{imageData}, |
64 | 26 | } |
65 | 27 |
|
66 | | -// Create a new request |
67 | | -req,err:=http.NewRequest( |
68 | | -"POST", |
69 | | -OLLAMA_HOST+"/api/chat", |
70 | | -bytes.NewBuffer(jsonData), |
71 | | -) |
72 | | -iferr!=nil { |
73 | | -return"",fmt.Errorf("failed to create request: %w",err) |
| 28 | +ctx:=context.Background() |
| 29 | +req:=&api.ChatRequest{ |
| 30 | +Model:model, |
| 31 | +Messages: []api.Message{msg}, |
74 | 32 | } |
75 | 33 |
|
76 | | -// Set content-type header |
77 | | -req.Header.Set("Content-Type","application/json") |
78 | | - |
79 | | -// Make the request |
80 | | -client:=&http.Client{} |
81 | | -resp,err:=client.Do(req) |
82 | | -iferr!=nil { |
83 | | -return"",fmt.Errorf("failed to make request: %w",err) |
| 34 | +varresponse strings.Builder |
| 35 | +respFunc:=func(resp api.ChatResponse)error { |
| 36 | +response.WriteString(resp.Message.Content) |
| 37 | +returnnil |
84 | 38 | } |
85 | | -deferresp.Body.Close() |
86 | 39 |
|
87 | | -// Read the response body |
88 | | -body,err:=io.ReadAll(resp.Body) |
| 40 | +err=ol.Chat(ctx,req,respFunc) |
89 | 41 | iferr!=nil { |
90 | | -return"",fmt.Errorf("failed to read response: %w",err) |
91 | | -} |
92 | | - |
93 | | -// Check status code |
94 | | -ifresp.StatusCode!=http.StatusOK { |
95 | | -return"",fmt.Errorf("unexpected status code: %d, body: %s",resp.StatusCode,string(body)) |
96 | | -} |
97 | | - |
98 | | -// Parse the response |
99 | | -varresponseOllamaResponse |
100 | | -iferr:=json.Unmarshal(body,&response);err!=nil { |
101 | | -return"",fmt.Errorf("failed to parse response: %w",err) |
| 42 | +log.Fatal(err) |
102 | 43 | } |
103 | | -returnresponse.Message.Content,nil |
| 44 | +returnresponse.String(),nil |
104 | 45 | } |
105 | 46 |
|
106 | 47 | // ProcessImages walks through a given path and processes image files |
@@ -156,16 +97,22 @@ type Args struct { |
156 | 97 | StartCaptionstring`arg:"--start,-s" help:"Start the caption with this (image of Leela the dog,)"` |
157 | 98 | EnddCaptionstring`arg:"--end,-e" help:"End the caption with this (in the style of 'something')"` |
158 | 99 | Promptstring`arg:"--prompt,-p" help:"The prompt to use" default:"Please describe the content and style of this image in detail. Answer only with one sentence that is starting with \"A ...\""` |
| 100 | +Modelstring`arg:"--model,-m" help:"The model that will be used (must be a vision model like \"llava\")" default:"x/llama3.2-vision"` |
159 | 101 | } |
160 | 102 |
|
161 | 103 | funcmain() { |
162 | 104 | varargsArgs |
163 | 105 |
|
164 | 106 | arg.MustParse(&args) |
165 | 107 |
|
| 108 | +ol,err:=api.ClientFromEnvironment() |
| 109 | +iferr!=nil { |
| 110 | +log.Fatal(err) |
| 111 | +} |
| 112 | + |
166 | 113 | // and mention "colorized photo" |
167 | | -err:=ProcessImages(args.Path,func(pathstring,rootstring) { |
168 | | -captionText,err:=GenerateWithImage(args.Prompt,path) |
| 114 | +err=ProcessImages(args.Path,func(pathstring,rootstring) { |
| 115 | +captionText,err:=GenerateWithImage(ol,args.Model,args.Prompt,path) |
169 | 116 | iferr!=nil { |
170 | 117 | log.Fatalf("Aborting because of %v",err) |
171 | 118 | } |
|