feat: do not chunk
Some checks failed
Dependabot Auto-Merge / dependabot (pull_request) Has been skipped
Dependabot Auto-Merge / devopsbot (pull_request) Has been skipped
Dependabot Auto-Merge / rennovatebot (pull_request) Has been skipped
COMMIT LINT / commitlint (pull_request) Successful in 38s
Unit Tests / unittest (pull_request) Failing after 1m0s
Some checks failed
Dependabot Auto-Merge / dependabot (pull_request) Has been skipped
Dependabot Auto-Merge / devopsbot (pull_request) Has been skipped
Dependabot Auto-Merge / rennovatebot (pull_request) Has been skipped
COMMIT LINT / commitlint (pull_request) Successful in 38s
Unit Tests / unittest (pull_request) Failing after 1m0s
This commit is contained in:
1302
dist/index.js
vendored
1302
dist/index.js
vendored
File diff suppressed because one or more lines are too long
247
index.js
247
index.js
@@ -4,41 +4,96 @@ const fs = require("fs");
|
||||
const core = require("@actions/core");
|
||||
const github = require("@actions/github");
|
||||
|
||||
const apiUrl =
|
||||
core.getInput("api_url") || "http://agents-api.servc-agents:3000";
|
||||
const apiToken = core.getInput("api_token");
|
||||
const route = core.getInput("route") || "agent-lake";
|
||||
const method = core.getInput("method") || "embeddings_insert";
|
||||
const debug = (core.getInput("debug") || "false").toLowerCase() === "true";
|
||||
function listMarkdownFiles(executor = execSync) {
|
||||
const output = executor("git ls-files '*.md'", {
|
||||
encoding: "utf8",
|
||||
}).trim();
|
||||
|
||||
const repoFull = process.env.GITHUB_REPOSITORY;
|
||||
const [owner, repo] = repoFull.split("/");
|
||||
const segment_id = ["docs", owner].join(".");
|
||||
|
||||
const serverUrl = (
|
||||
process.env.GITHUB_SERVER_URL ||
|
||||
github.context.serverUrl ||
|
||||
"https://git.yusufali.ca"
|
||||
).replace(/\/$/, "");
|
||||
|
||||
const markdownFiles = execSync("git ls-files '*.md'", {
|
||||
encoding: "utf8",
|
||||
})
|
||||
.trim()
|
||||
.split("\n")
|
||||
.filter(Boolean);
|
||||
|
||||
const headers = {
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
|
||||
if (apiToken) {
|
||||
headers.Apitoken = apiToken;
|
||||
return output ? output.split("\n").filter(Boolean) : [];
|
||||
}
|
||||
|
||||
async function post(requestPayload, retries=0) {
|
||||
try{
|
||||
const response = await fetch(apiUrl, {
|
||||
function getConfig({
|
||||
coreModule = core,
|
||||
githubModule = github,
|
||||
env = process.env,
|
||||
} = {}) {
|
||||
const apiUrl =
|
||||
coreModule.getInput("api_url") || "http://agents-api.servc-agents:3000";
|
||||
const apiToken = coreModule.getInput("api_token");
|
||||
const route = coreModule.getInput("route") || "agent-lake";
|
||||
const method = coreModule.getInput("method") || "embeddings_insert";
|
||||
const debug =
|
||||
(coreModule.getInput("debug") || "false").toLowerCase() === "true";
|
||||
|
||||
const repoFull = env.GITHUB_REPOSITORY || "";
|
||||
const [owner = "", repo = ""] = repoFull.split("/");
|
||||
const segmentId = "docs.code";
|
||||
|
||||
const serverUrl = (
|
||||
env.GITHUB_SERVER_URL ||
|
||||
githubModule.context.serverUrl ||
|
||||
"https://git.yusufali.ca"
|
||||
).replace(/\/$/, "");
|
||||
|
||||
const headers = {
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
|
||||
if (apiToken) {
|
||||
headers.Apitoken = apiToken;
|
||||
}
|
||||
|
||||
return {
|
||||
apiUrl,
|
||||
apiToken,
|
||||
route,
|
||||
method,
|
||||
debug,
|
||||
owner,
|
||||
repo,
|
||||
segmentId,
|
||||
serverUrl,
|
||||
headers,
|
||||
};
|
||||
}
|
||||
|
||||
function sanitizeDocumentId(value) {
|
||||
return value.replace(/[^A-Za-z0-9.]/g, "");
|
||||
}
|
||||
|
||||
function buildRequestPayload({
|
||||
route,
|
||||
method,
|
||||
segmentId,
|
||||
owner,
|
||||
repo,
|
||||
file,
|
||||
content,
|
||||
}) {
|
||||
return {
|
||||
type: "input",
|
||||
route,
|
||||
argumentId: "plain",
|
||||
force: true,
|
||||
instanceId: null,
|
||||
inputs: {
|
||||
method,
|
||||
inputs: {
|
||||
segment_id: segmentId,
|
||||
document_id: sanitizeDocumentId([owner, repo, file].join(".")),
|
||||
embed_text: "",
|
||||
store_text: content,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function post(
|
||||
{ apiUrl, headers, requestPayload, coreModule, fetchFn, waitFn },
|
||||
retries = 0,
|
||||
) {
|
||||
try {
|
||||
const response = await fetchFn(apiUrl, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(requestPayload),
|
||||
@@ -46,85 +101,77 @@ async function post(requestPayload, retries=0) {
|
||||
|
||||
const responseText = await response.text();
|
||||
if (!response.ok) {
|
||||
core.setFailed(
|
||||
coreModule.setFailed(
|
||||
`Agent API request failed (${response.status}): ${responseText}`,
|
||||
);
|
||||
} else {
|
||||
core.info(`Agent response: ${responseText}`);
|
||||
coreModule.info(`Agent response: ${responseText}`);
|
||||
}
|
||||
} catch(e){
|
||||
if(retries < 5){
|
||||
} catch (error) {
|
||||
if (retries < 5) {
|
||||
const delayMs = 1000 * (retries + 1);
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
return post(requestPayload, retries + 1);
|
||||
await waitFn(delayMs);
|
||||
return post(
|
||||
{ apiUrl, headers, requestPayload, coreModule, fetchFn, waitFn },
|
||||
retries + 1,
|
||||
);
|
||||
}
|
||||
core.setFailed(`Error sending task to agent: ${e}`)
|
||||
coreModule.setFailed(`Error sending task to agent: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
for (const file of markdownFiles) {
|
||||
const content = fs.readFileSync(file, "utf8").trim();
|
||||
const lines = content.split(/\r?\n/);
|
||||
const h1Line = lines.find((line) => /^#\s+/.test(line)) || "";
|
||||
async function main({
|
||||
coreModule = core,
|
||||
githubModule = github,
|
||||
env = process.env,
|
||||
executor = execSync,
|
||||
fsModule = fs,
|
||||
fetchFn = fetch,
|
||||
waitFn = (delayMs) => new Promise((resolve) => setTimeout(resolve, delayMs)),
|
||||
} = {}) {
|
||||
const config = getConfig({ coreModule, githubModule, env });
|
||||
const markdownFiles = listMarkdownFiles(executor);
|
||||
|
||||
const chunks = [];
|
||||
let current = [];
|
||||
for (const file of markdownFiles) {
|
||||
const content = fsModule.readFileSync(file, "utf8");
|
||||
const requestPayload = buildRequestPayload({
|
||||
route: config.route,
|
||||
method: config.method,
|
||||
segmentId: config.segmentId,
|
||||
owner: config.owner,
|
||||
repo: config.repo,
|
||||
file,
|
||||
content,
|
||||
});
|
||||
|
||||
for (const line of lines) {
|
||||
if (/^##\s+/.test(line)) {
|
||||
if (current.length) {
|
||||
chunks.push(current.join("\n").trim());
|
||||
}
|
||||
current = [line];
|
||||
} else {
|
||||
current.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
if (current.length) {
|
||||
chunks.push(current.join("\n").trim());
|
||||
}
|
||||
|
||||
const normalizedChunks =
|
||||
chunks.length > 0
|
||||
? chunks.map((chunk) => {
|
||||
let chunkLines = chunk.split(/\r?\n/);
|
||||
if (h1Line && chunkLines[0] === h1Line) {
|
||||
chunkLines = chunkLines.slice(1);
|
||||
}
|
||||
const body = chunkLines.join("\n").trim();
|
||||
return [h1Line, body].filter(Boolean).join("\n");
|
||||
})
|
||||
: [content];
|
||||
|
||||
normalizedChunks.forEach((chunk, index) => {
|
||||
const requestPayload = {
|
||||
type: "input",
|
||||
route,
|
||||
argumentId: "plain",
|
||||
force: true,
|
||||
instanceId: null,
|
||||
inputs: {
|
||||
method,
|
||||
inputs: {
|
||||
segment_id,
|
||||
document_id: [repo, file.replace(".", ""), `part${index + 1}`].join(
|
||||
".",
|
||||
),
|
||||
embed_text: chunk,
|
||||
store_text: chunk,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
if (debug) {
|
||||
core.info(`API URL: ${apiUrl}`);
|
||||
core.info(`Route: ${route}`);
|
||||
core.info(`Server URL: ${serverUrl}`);
|
||||
core.info(`Using auth: ${Boolean(apiToken)}`);
|
||||
core.info(`Request payload: ${JSON.stringify(requestPayload)}`);
|
||||
if (config.debug) {
|
||||
coreModule.info(`API URL: ${config.apiUrl}`);
|
||||
coreModule.info(`Route: ${config.route}`);
|
||||
coreModule.info(`Server URL: ${config.serverUrl}`);
|
||||
coreModule.info(`Using auth: ${Boolean(config.apiToken)}`);
|
||||
coreModule.info(`Request payload: ${JSON.stringify(requestPayload)}`);
|
||||
}
|
||||
|
||||
post(requestPayload);
|
||||
});
|
||||
await post({
|
||||
apiUrl: config.apiUrl,
|
||||
headers: config.headers,
|
||||
requestPayload,
|
||||
coreModule,
|
||||
fetchFn,
|
||||
waitFn,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
buildRequestPayload,
|
||||
getConfig,
|
||||
listMarkdownFiles,
|
||||
main,
|
||||
post,
|
||||
sanitizeDocumentId,
|
||||
};
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
|
||||
102
index.test.js
Normal file
102
index.test.js
Normal file
@@ -0,0 +1,102 @@
|
||||
const test = require("node:test");
|
||||
const assert = require("node:assert/strict");
|
||||
|
||||
const { buildRequestPayload, main, sanitizeDocumentId } = require("./index");
|
||||
|
||||
test("sanitizeDocumentId removes special characters and preserves periods", () => {
|
||||
assert.equal(
|
||||
sanitizeDocumentId("acme.docs-repo.guides/setup v2!.md"),
|
||||
"acme.docsrepo.guidessetupv2.md",
|
||||
);
|
||||
});
|
||||
|
||||
test("buildRequestPayload sends the full document in store_text and an empty embed_text", () => {
|
||||
const content = "# Title\n\n## First\nalpha\n\n## Second\nbeta\n";
|
||||
const payload = buildRequestPayload({
|
||||
route: "agent-lake",
|
||||
method: "embeddings_insert",
|
||||
segmentId: "docs.acme",
|
||||
owner: "acme",
|
||||
repo: "docs-repo",
|
||||
file: "guides/setup.md",
|
||||
content,
|
||||
});
|
||||
|
||||
assert.equal(payload.inputs.inputs.embed_text, "");
|
||||
assert.equal(payload.inputs.inputs.store_text, content);
|
||||
assert.equal(
|
||||
payload.inputs.inputs.document_id,
|
||||
"acme.docsrepo.guidessetup.md",
|
||||
);
|
||||
});
|
||||
|
||||
test("main sends one request per file even when the document contains multiple sections", async () => {
|
||||
const calls = [];
|
||||
const infos = [];
|
||||
const failures = [];
|
||||
const content = "# Title\n\n## First\nalpha\n\n## Second\nbeta\n";
|
||||
|
||||
await main({
|
||||
coreModule: {
|
||||
getInput(name) {
|
||||
const inputs = {
|
||||
api_url: "https://agents.example/api",
|
||||
api_token: "",
|
||||
route: "agent-lake",
|
||||
method: "embeddings_insert",
|
||||
debug: "false",
|
||||
};
|
||||
return inputs[name] || "";
|
||||
},
|
||||
info(message) {
|
||||
infos.push(message);
|
||||
},
|
||||
setFailed(message) {
|
||||
failures.push(message);
|
||||
},
|
||||
},
|
||||
githubModule: {
|
||||
context: {
|
||||
serverUrl: "https://github.example",
|
||||
},
|
||||
},
|
||||
env: {
|
||||
GITHUB_REPOSITORY: "acme/docs-repo",
|
||||
GITHUB_SERVER_URL: "https://github.example",
|
||||
},
|
||||
executor() {
|
||||
return "README.md\n";
|
||||
},
|
||||
fsModule: {
|
||||
readFileSync(file, encoding) {
|
||||
assert.equal(file, "README.md");
|
||||
assert.equal(encoding, "utf8");
|
||||
return content;
|
||||
},
|
||||
},
|
||||
fetchFn: async (url, options) => {
|
||||
calls.push({
|
||||
url,
|
||||
options,
|
||||
body: JSON.parse(options.body),
|
||||
});
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
text: async () => "ok",
|
||||
};
|
||||
},
|
||||
waitFn: async () => {},
|
||||
});
|
||||
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].url, "https://agents.example/api");
|
||||
assert.equal(calls[0].body.inputs.inputs.embed_text, "");
|
||||
assert.equal(calls[0].body.inputs.inputs.store_text, content);
|
||||
assert.equal(
|
||||
calls[0].body.inputs.inputs.document_id,
|
||||
"acme.docsrepo.README.md",
|
||||
);
|
||||
assert.deepEqual(failures, []);
|
||||
assert.deepEqual(infos, ["Agent response: ok"]);
|
||||
});
|
||||
@@ -4,7 +4,7 @@
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
"test": "node --test"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
|
||||
21
seed_examples.py
Normal file
21
seed_examples.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import yaml
|
||||
from servc_agent.servc_send import process
|
||||
|
||||
with open("examples.yaml") as f:
|
||||
data = yaml.safe_load(f.read())
|
||||
|
||||
for obj in data["examples"]:
|
||||
process(
|
||||
"agent-lake",
|
||||
"embeddings_insert",
|
||||
{
|
||||
"segment_id": data["segment"],
|
||||
"document_id": obj["id"],
|
||||
"embed_text": obj["inputfile"],
|
||||
"store_text": " -> ".join(
|
||||
[f"input: {obj["inputfile"]}", obj["answer"]]
|
||||
),
|
||||
},
|
||||
force=False,
|
||||
poll=False,
|
||||
)
|
||||
Reference in New Issue
Block a user