feat: do not chunk
All checks were successful
Dependabot Auto-Merge / dependabot (pull_request) Has been skipped
Dependabot Auto-Merge / devopsbot (pull_request) Has been skipped
Dependabot Auto-Merge / rennovatebot (pull_request) Has been skipped
COMMIT LINT / commitlint (pull_request) Successful in 24s
Unit Tests / unittest (pull_request) Successful in 1m53s

This commit is contained in:
2026-06-08 14:10:21 +00:00
parent ce622505eb
commit 3f35b97e15
9 changed files with 989 additions and 732 deletions

View File

@@ -16,7 +16,7 @@ jobs:
- name: Send task to coding agent
uses: https://git.yusufali.ca/actions/embed-markdown@main
with:
api_url: ${{ secrets.AGENT_API_URL }} # default: https://api.servc.io
api_url: ${{ secrets.AGENT_API_URL }} # default: http://agent-api.k8s.private
api_token: ${{ secrets.AGENT_TOKEN }} # optional, required for authenticated API calls
route: agent-lake # default: agent-lake
debug: false

View File

@@ -13,8 +13,13 @@ jobs:
steps:
- uses: actions/checkout@v6
- name: Test Local Action
id: test
uses: ./
- uses: actions/setup-node@v4
with:
debug: 'true'
node-version: 20
cache: npm
- name: Install dependencies
run: npm ci
- name: Run unit tests
run: npm test

View File

@@ -22,9 +22,8 @@ jobs:
- name: Send task to coding agent
uses: https://git.yusufali.ca/actions/embed-markdown@main
with:
api_url: ${{ secrets.AGENT_API_URL }} # default: https://api.servc.io
api_url: ${{ secrets.AGENT_API_URL }} # default: http://agent-api.k8s.private
api_token: ${{ secrets.AGENT_TOKEN }} # optional, required for authenticated API calls
route: agent-lake # default: agent-lake
debug: false
```

View File

@@ -3,7 +3,7 @@ description: 'Sends markdown files to the lake to embed into vector form'
inputs:
api_url:
description: 'api url'
default: 'http://agents-api.servc-agents:3000'
default: 'http://agent-api.k8s.private'
required: false
api_token:
description: 'API token for authentication'

1302
dist/index.js vendored

File diff suppressed because one or more lines are too long

227
index.js
View File

@@ -4,30 +4,37 @@ const fs = require("fs");
const core = require("@actions/core");
const github = require("@actions/github");
const apiUrl =
core.getInput("api_url") || "http://agents-api.servc-agents:3000";
const apiToken = core.getInput("api_token");
const route = core.getInput("route") || "agent-lake";
const method = core.getInput("method") || "embeddings_insert";
const debug = (core.getInput("debug") || "false").toLowerCase() === "true";
function listMarkdownFiles(executor = execSync) {
const output = executor("git ls-files '*.md'", {
encoding: "utf8",
}).trim();
const repoFull = process.env.GITHUB_REPOSITORY;
const [owner, repo] = repoFull.split("/");
const segment_id = ["docs", owner].join(".");
return output ? output.split("\n").filter(Boolean) : [];
}
function getConfig({
coreModule = core,
githubModule = github,
env = process.env,
} = {}) {
const apiUrl =
coreModule.getInput("api_url") || "http://agent-api.k8s.private";
const apiToken = coreModule.getInput("api_token");
const route = coreModule.getInput("route") || "agent-lake";
const method = coreModule.getInput("method") || "embeddings_insert";
const debug =
(coreModule.getInput("debug") || "false").toLowerCase() === "true";
const repoFull = env.GITHUB_REPOSITORY || "";
const [owner = "", repo = ""] = repoFull.split("/");
const segmentId = "docs.code";
const serverUrl = (
process.env.GITHUB_SERVER_URL ||
github.context.serverUrl ||
env.GITHUB_SERVER_URL ||
githubModule.context.serverUrl ||
"https://git.yusufali.ca"
).replace(/\/$/, "");
const markdownFiles = execSync("git ls-files '*.md'", {
encoding: "utf8",
})
.trim()
.split("\n")
.filter(Boolean);
const headers = {
"Content-Type": "application/json",
};
@@ -36,69 +43,34 @@ if (apiToken) {
headers.Apitoken = apiToken;
}
async function post(requestPayload, retries=0) {
try{
const response = await fetch(apiUrl, {
method: "POST",
return {
apiUrl,
apiToken,
route,
method,
debug,
owner,
repo,
segmentId,
serverUrl,
headers,
body: JSON.stringify(requestPayload),
});
const responseText = await response.text();
if (!response.ok) {
core.setFailed(
`Agent API request failed (${response.status}): ${responseText}`,
);
} else {
core.info(`Agent response: ${responseText}`);
}
} catch(e){
if(retries < 5){
const delayMs = 1000 * (retries + 1);
await new Promise((resolve) => setTimeout(resolve, delayMs));
return post(requestPayload, retries + 1);
}
core.setFailed(`Error sending task to agent: ${e}`)
}
};
}
for (const file of markdownFiles) {
const content = fs.readFileSync(file, "utf8").trim();
const lines = content.split(/\r?\n/);
const h1Line = lines.find((line) => /^#\s+/.test(line)) || "";
const chunks = [];
let current = [];
for (const line of lines) {
if (/^##\s+/.test(line)) {
if (current.length) {
chunks.push(current.join("\n").trim());
}
current = [line];
} else {
current.push(line);
}
function sanitizeDocumentId(value) {
return value.replace(/[^A-Za-z0-9.]/g, "");
}
if (current.length) {
chunks.push(current.join("\n").trim());
}
const normalizedChunks =
chunks.length > 0
? chunks.map((chunk) => {
let chunkLines = chunk.split(/\r?\n/);
if (h1Line && chunkLines[0] === h1Line) {
chunkLines = chunkLines.slice(1);
}
const body = chunkLines.join("\n").trim();
return [h1Line, body].filter(Boolean).join("\n");
})
: [content];
normalizedChunks.forEach((chunk, index) => {
const requestPayload = {
function buildRequestPayload({
route,
method,
segmentId,
owner,
repo,
file,
content,
}) {
return {
type: "input",
route,
argumentId: "plain",
@@ -107,24 +79,99 @@ for (const file of markdownFiles) {
inputs: {
method,
inputs: {
segment_id,
document_id: [repo, file.replace(".", ""), `part${index + 1}`].join(
".",
),
embed_text: chunk,
store_text: chunk,
segment_id: segmentId,
document_id: sanitizeDocumentId([owner, repo, file].join(".")),
embed_text: "",
store_text: content,
},
},
};
if (debug) {
core.info(`API URL: ${apiUrl}`);
core.info(`Route: ${route}`);
core.info(`Server URL: ${serverUrl}`);
core.info(`Using auth: ${Boolean(apiToken)}`);
core.info(`Request payload: ${JSON.stringify(requestPayload)}`);
}
post(requestPayload);
async function post(
{ apiUrl, headers, requestPayload, coreModule, fetchFn, waitFn },
retries = 0,
) {
try {
const response = await fetchFn(apiUrl, {
method: "POST",
headers,
body: JSON.stringify(requestPayload),
});
const responseText = await response.text();
if (!response.ok) {
coreModule.setFailed(
`Agent API request failed (${response.status}): ${responseText}`,
);
} else {
coreModule.info(`Agent response: ${responseText}`);
}
} catch (error) {
if (retries < 5) {
const delayMs = 1000 * (retries + 1);
await waitFn(delayMs);
return post(
{ apiUrl, headers, requestPayload, coreModule, fetchFn, waitFn },
retries + 1,
);
}
coreModule.setFailed(`Error sending task to agent: ${error}`);
}
}
async function main({
coreModule = core,
githubModule = github,
env = process.env,
executor = execSync,
fsModule = fs,
fetchFn = fetch,
waitFn = (delayMs) => new Promise((resolve) => setTimeout(resolve, delayMs)),
} = {}) {
const config = getConfig({ coreModule, githubModule, env });
const markdownFiles = listMarkdownFiles(executor);
for (const file of markdownFiles) {
const content = fsModule.readFileSync(file, "utf8");
const requestPayload = buildRequestPayload({
route: config.route,
method: config.method,
segmentId: config.segmentId,
owner: config.owner,
repo: config.repo,
file,
content,
});
if (config.debug) {
coreModule.info(`API URL: ${config.apiUrl}`);
coreModule.info(`Route: ${config.route}`);
coreModule.info(`Server URL: ${config.serverUrl}`);
coreModule.info(`Using auth: ${Boolean(config.apiToken)}`);
coreModule.info(`Request payload: ${JSON.stringify(requestPayload)}`);
}
await post({
apiUrl: config.apiUrl,
headers: config.headers,
requestPayload,
coreModule,
fetchFn,
waitFn,
});
}
}
module.exports = {
buildRequestPayload,
getConfig,
listMarkdownFiles,
main,
post,
sanitizeDocumentId,
};
if (require.main === module) {
main();
}

127
index.test.js Normal file
View File

@@ -0,0 +1,127 @@
const test = require("node:test");
const assert = require("node:assert/strict");
const {
buildRequestPayload,
getConfig,
main,
sanitizeDocumentId,
} = require("./index");
test("getConfig defaults to the ingress API URL", () => {
const config = getConfig({
coreModule: {
getInput() {
return "";
},
},
githubModule: {
context: {
serverUrl: "https://github.example",
},
},
env: {},
});
assert.equal(config.apiUrl, "http://agent-api.k8s.private");
assert.equal(config.route, "agent-lake");
assert.equal(config.method, "embeddings_insert");
});
test("sanitizeDocumentId removes special characters and preserves periods", () => {
assert.equal(
sanitizeDocumentId("acme.docs-repo.guides/setup v2!.md"),
"acme.docsrepo.guidessetupv2.md",
);
});
test("buildRequestPayload sends the full document in store_text and an empty embed_text", () => {
const content = "# Title\n\n## First\nalpha\n\n## Second\nbeta\n";
const payload = buildRequestPayload({
route: "agent-lake",
method: "embeddings_insert",
segmentId: "docs.acme",
owner: "acme",
repo: "docs-repo",
file: "guides/setup.md",
content,
});
assert.equal(payload.inputs.inputs.embed_text, "");
assert.equal(payload.inputs.inputs.store_text, content);
assert.equal(
payload.inputs.inputs.document_id,
"acme.docsrepo.guidessetup.md",
);
});
test("main sends one request per file even when the document contains multiple sections", async () => {
const calls = [];
const infos = [];
const failures = [];
const content = "# Title\n\n## First\nalpha\n\n## Second\nbeta\n";
await main({
coreModule: {
getInput(name) {
const inputs = {
api_url: "https://agents.example/api",
api_token: "",
route: "agent-lake",
method: "embeddings_insert",
debug: "false",
};
return inputs[name] || "";
},
info(message) {
infos.push(message);
},
setFailed(message) {
failures.push(message);
},
},
githubModule: {
context: {
serverUrl: "https://github.example",
},
},
env: {
GITHUB_REPOSITORY: "acme/docs-repo",
GITHUB_SERVER_URL: "https://github.example",
},
executor() {
return "README.md\n";
},
fsModule: {
readFileSync(file, encoding) {
assert.equal(file, "README.md");
assert.equal(encoding, "utf8");
return content;
},
},
fetchFn: async (url, options) => {
calls.push({
url,
options,
body: JSON.parse(options.body),
});
return {
ok: true,
text: async () => "ok",
};
},
waitFn: async () => {},
});
assert.equal(calls.length, 1);
assert.equal(calls[0].url, "https://agents.example/api");
assert.equal(calls[0].body.inputs.inputs.embed_text, "");
assert.equal(calls[0].body.inputs.inputs.store_text, content);
assert.equal(
calls[0].body.inputs.inputs.document_id,
"acme.docsrepo.README.md",
);
assert.deepEqual(failures, []);
assert.deepEqual(infos, ["Agent response: ok"]);
});

View File

@@ -4,7 +4,7 @@
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
"test": "node --test"
},
"keywords": [],
"author": "",

21
seed_examples.py Normal file
View File

@@ -0,0 +1,21 @@
import yaml
from servc_agent.servc_send import process
with open("examples.yaml") as f:
data = yaml.safe_load(f.read())
for obj in data["examples"]:
process(
"agent-lake",
"embeddings_insert",
{
"segment_id": data["segment"],
"document_id": obj["id"],
"embed_text": obj["inputfile"],
"store_text": " -> ".join(
[f"input: {obj["inputfile"]}", obj["answer"]]
),
},
force=False,
poll=False,
)