Skip to content

Commit

Permalink
Add support for JSON parameters to new server
Browse files Browse the repository at this point in the history
This uses the Redbean JSON parser adapted for C++.
  • Loading branch information
jart committed Jul 5, 2024
1 parent 1601118 commit d7c8e33
Show file tree
Hide file tree
Showing 11 changed files with 2,095 additions and 187 deletions.
6 changes: 6 additions & 0 deletions llamafile/server/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,15 @@ o/$(MODE)/llamafile/server/main: \
$(LLAMAFILE_SERVER_OBJS): llamafile/server/BUILD.mk
$(LLAMAFILE_SERVER_OBJS): private CCFLAGS += -O

o/$(MODE)/llamafile/server/fastjson_test: \
o/$(MODE)/llamafile/server/fastjson_test.o \
o/$(MODE)/llamafile/server/fastjson.o \
o/$(MODE)/double-conversion/double-conversion.a \

o/$(MODE)/llamafile/server/json_test: \
o/$(MODE)/llamafile/server/json_test.o \
o/$(MODE)/llamafile/server/json.o \
o/$(MODE)/llamafile/server/hextoint.o \
o/$(MODE)/double-conversion/double-conversion.a \

.PHONY: o/$(MODE)/llamafile/server
Expand Down
3 changes: 3 additions & 0 deletions llamafile/server/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
#define HeaderEqualCase(H, S) \
SlicesEqualCase(S, strlen(S), HeaderData(H), HeaderLength(H))

struct EmbeddingParams;

struct Cleanup
{
Cleanup* next;
Expand Down Expand Up @@ -85,4 +87,5 @@ struct Client
bool dispatch() __wur;
bool tokenize() __wur;
bool embedding() __wur;
bool get_embedding_params(EmbeddingParams*);
};
77 changes: 56 additions & 21 deletions llamafile/server/embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,19 @@

#include "llama.cpp/llama.h"

#include "fastjson.h"
#include "json.h"
#include "log.h"
#include "utils.h"

struct EmbeddingParams
{
bool add_special;
bool parse_special;
ctl::string_view prompt;
ctl::string content;
};

extern llama_model* g_model;

void
Expand Down Expand Up @@ -78,56 +87,82 @@ cleanup_llama_context(void* arg)
llama_free((llama_context*)arg);
}

bool
Client::embedding()
void
cleanup_embedding_params(void* arg)
{
if (msg.method != kHttpGet && msg.method != kHttpPost)
return send_error(405);
delete (EmbeddingParams*)arg;
}

if (!read_payload())
return false;
bool
Client::get_embedding_params(EmbeddingParams* params)
{
params->add_special = atob(or_empty(param("add_special")), true);
params->parse_special = atob(or_empty(param("parse_special")), false);

// get prompt
//
// 1. Allow GET "/tokenize?prompt=foo"
// 2. Allow POST "prompt=foo" (application/x-www-form-urlencoded)
// 2. Allow POST {"content": "foo"} (application/json)
// 3. Allow POST "prompt=foo" (application/x-www-form-urlencoded)
// 3. Allow POST "foo" (text/plain)
//
ctl::string_view input;
ctl::optional<ctl::string_view> prompt = param("prompt");
if (prompt.has_value()) {
input = prompt.value();
params->prompt = prompt.value();
} else if (HasHeader(kHttpContentType)) {
if (IsMimeType(HeaderData(kHttpContentType),
HeaderLength(kHttpContentType),
"text/plain")) {
input = payload;
params->prompt = payload;
} else if (IsMimeType(HeaderData(kHttpContentType),
HeaderLength(kHttpContentType),
"application/json")) {
ctl::pair<Json::Status, Json> json = Json::parse(payload);
if (json.first != Json::success)
return send_error(400, Json::StatusToString(json.first));
if (!json.second["content"].isString())
return send_error(400, "JSON missing \"content\" key");
params->content = ctl::move(json.second["content"].getString());
params->prompt = params->content;
} else {
return send_error(501, "Content Type Not Implemented");
}
} else {
input = payload;
params->prompt = payload;
}
return true;
}

// get optional parameters
bool add_special = atob(or_empty(param("add_special")), true);
bool parse_special = atob(or_empty(param("parse_special")), false);
bool
Client::embedding()
{
if (msg.method != kHttpGet && msg.method != kHttpPost)
return send_error(405);

if (!read_payload())
return false;

// get parameters
auto params = new EmbeddingParams;
defer_cleanup(cleanup_embedding_params, params);
if (!get_embedding_params(params))
return false;

// setup statistics
rusage rustart = {};
getrusage(RUSAGE_THREAD, &rustart);
timespec started = timespec_real();

// turn text into tokens
auto toks = new ctl::vector<llama_token>(input.size() + 16);
auto toks = new ctl::vector<llama_token>(params->prompt.size() + 16);
defer_cleanup(cleanup_token_vector, toks);
int count = llama_tokenize(g_model,
input.data(),
input.size(),
params->prompt.data(),
params->prompt.size(),
&(*toks)[0],
toks->size(),
add_special,
parse_special);
params->add_special,
params->parse_special);
if (count < 0) {
LOG("llama_tokenize failed");
return send_error(405);
Expand Down Expand Up @@ -193,10 +228,10 @@ Client::embedding()
char* p = obuf.p;
p = stpcpy(p, "{\r\n");
p = stpcpy(p, " \"add_special\": ");
p = encode_bool(p, add_special);
p = encode_bool(p, params->add_special);
p = stpcpy(p, ",\n");
p = stpcpy(p, " \"parse_special\": ");
p = encode_bool(p, parse_special);
p = encode_bool(p, params->parse_special);
p = stpcpy(p, ",\n");
p = stpcpy(p, " \"tokens_provided\": ");
p = encode_json(p, toks->size());
Expand Down
181 changes: 181 additions & 0 deletions llamafile/server/fastjson.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "fastjson.h"

#include <cosmo.h>
#include <ctl/string.h>
#include <net/http/escape.h>

#include "double-conversion/double-to-string.h"
#include "double-conversion/utils.h"

static const char kEscapeLiteral[128] = {
9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 2, 9, 4, 3, 9, 9, // 0x00
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 0x10
0, 0, 7, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 6, // 0x20
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 0, // 0x30
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, // 0x50
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // 0x70
};

static const double_conversion::DoubleToStringConverter kDoubleToJson(
double_conversion::DoubleToStringConverter::UNIQUE_ZERO |
double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
"1e5000",
"null",
'e',
-6,
21,
6,
0);

char*
encode_bool(char* p, bool x) noexcept
{
return stpcpy(p, x ? "true" : "false");
}

char*
encode_json(char* p, int x) noexcept
{
return FormatInt32(p, x);
}

char*
encode_json(char* p, long x) noexcept
{
return FormatInt64(p, x);
}

char*
encode_json(char* p, unsigned x) noexcept
{
return FormatUint32(p, x);
}

char*
encode_json(char* p, unsigned long x) noexcept
{
return FormatUint64(p, x);
}

char*
encode_json(char* p, float x) noexcept
{
double_conversion::StringBuilder b(p, 256);
kDoubleToJson.ToShortestSingle(x, &b);
b.Finalize();
return p + strlen(p);
}

char*
encode_json(char* p, double x) noexcept
{
double_conversion::StringBuilder b(p, 256);
kDoubleToJson.ToShortest(x, &b);
b.Finalize();
return p + strlen(p);
}

char*
encode_json(char* p, const ctl::string_view s) noexcept
{
*p++ = '"';
p = encode_js_string_literal(p, s);
*p++ = '"';
*p = 0;
return p;
}

char*
encode_js_string_literal(char* p, const ctl::string_view s) noexcept
{
uint64_t w;
size_t i, j, m;
wint_t x, a, b;
for (size_t i = 0; i < s.size();) {
x = s[i++] & 255;
if (x >= 0300) {
a = ThomPikeByte(x);
m = ThomPikeLen(x) - 1;
if (i + m <= s.size()) {
for (j = 0;;) {
b = s[i + j] & 0xff;
if (!ThomPikeCont(b))
break;
a = ThomPikeMerge(a, b);
if (++j == m) {
x = a;
i += j;
break;
}
}
}
}
switch (0 <= x && x <= 127 ? kEscapeLiteral[x] : 9) {
case 0:
*p++ = x;
break;
case 1:
*p++ = '\\';
*p++ = 't';
break;
case 2:
*p++ = '\\';
*p++ = 'n';
break;
case 3:
*p++ = '\\';
*p++ = 'r';
break;
case 4:
*p++ = '\\';
*p++ = 'f';
break;
case 5:
*p++ = '\\';
*p++ = '\\';
break;
case 6:
*p++ = '\\';
*p++ = '/';
break;
case 7:
*p++ = '\\';
*p++ = '"';
break;
case 9:
w = EncodeUtf16(x);
do {
*p++ = '\\';
*p++ = 'u';
*p++ = "0123456789abcdef"[(w & 0xF000) >> 014];
*p++ = "0123456789abcdef"[(w & 0x0F00) >> 010];
*p++ = "0123456789abcdef"[(w & 0x00F0) >> 004];
*p++ = "0123456789abcdef"[(w & 0x000F) >> 000];
} while ((w >>= 16));
break;
default:
__builtin_unreachable();
}
}
*p = 0;
return p;
}
46 changes: 46 additions & 0 deletions llamafile/server/fastjson.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <ctl/string_view.h>

char*
encode_bool(char*, bool) noexcept;

char*
encode_json(char*, int) noexcept;

char*
encode_json(char*, long) noexcept;

char*
encode_json(char*, float) noexcept;

char*
encode_json(char*, double) noexcept;

char*
encode_json(char*, unsigned) noexcept;

char*
encode_json(char*, unsigned long) noexcept;

char*
encode_json(char*, const ctl::string_view) noexcept;

char*
encode_js_string_literal(char*, const ctl::string_view) noexcept;
Loading

0 comments on commit d7c8e33

Please sign in to comment.