feat: do not chunk
All checks were successful
Dependabot Auto-Merge / dependabot (pull_request) Has been skipped
Dependabot Auto-Merge / devopsbot (pull_request) Has been skipped
Dependabot Auto-Merge / rennovatebot (pull_request) Has been skipped
COMMIT LINT / commitlint (pull_request) Successful in 24s
Unit Tests / unittest (pull_request) Successful in 1m53s
All checks were successful
Dependabot Auto-Merge / dependabot (pull_request) Has been skipped
Dependabot Auto-Merge / devopsbot (pull_request) Has been skipped
Dependabot Auto-Merge / rennovatebot (pull_request) Has been skipped
COMMIT LINT / commitlint (pull_request) Successful in 24s
Unit Tests / unittest (pull_request) Successful in 1m53s
This commit is contained in:
2
.github/workflows/agent-markdown.yml
vendored
2
.github/workflows/agent-markdown.yml
vendored
@@ -16,7 +16,7 @@ jobs:
|
|||||||
- name: Send task to coding agent
|
- name: Send task to coding agent
|
||||||
uses: https://git.yusufali.ca/actions/embed-markdown@main
|
uses: https://git.yusufali.ca/actions/embed-markdown@main
|
||||||
with:
|
with:
|
||||||
api_url: ${{ secrets.AGENT_API_URL }} # default: https://api.servc.io
|
api_url: ${{ secrets.AGENT_API_URL }} # default: http://agent-api.k8s.private
|
||||||
api_token: ${{ secrets.AGENT_TOKEN }} # optional, required for authenticated API calls
|
api_token: ${{ secrets.AGENT_TOKEN }} # optional, required for authenticated API calls
|
||||||
route: agent-lake # default: agent-lake
|
route: agent-lake # default: agent-lake
|
||||||
debug: false
|
debug: false
|
||||||
13
.github/workflows/test.yml
vendored
13
.github/workflows/test.yml
vendored
@@ -13,8 +13,13 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Test Local Action
|
- uses: actions/setup-node@v4
|
||||||
id: test
|
|
||||||
uses: ./
|
|
||||||
with:
|
with:
|
||||||
debug: 'true'
|
node-version: 20
|
||||||
|
cache: npm
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Run unit tests
|
||||||
|
run: npm test
|
||||||
|
|||||||
@@ -22,9 +22,8 @@ jobs:
|
|||||||
- name: Send task to coding agent
|
- name: Send task to coding agent
|
||||||
uses: https://git.yusufali.ca/actions/embed-markdown@main
|
uses: https://git.yusufali.ca/actions/embed-markdown@main
|
||||||
with:
|
with:
|
||||||
api_url: ${{ secrets.AGENT_API_URL }} # default: https://api.servc.io
|
api_url: ${{ secrets.AGENT_API_URL }} # default: http://agent-api.k8s.private
|
||||||
api_token: ${{ secrets.AGENT_TOKEN }} # optional, required for authenticated API calls
|
api_token: ${{ secrets.AGENT_TOKEN }} # optional, required for authenticated API calls
|
||||||
route: agent-lake # default: agent-lake
|
route: agent-lake # default: agent-lake
|
||||||
debug: false
|
debug: false
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ description: 'Sends markdown files to the lake to embed into vector form'
|
|||||||
inputs:
|
inputs:
|
||||||
api_url:
|
api_url:
|
||||||
description: 'api url'
|
description: 'api url'
|
||||||
default: 'http://agents-api.servc-agents:3000'
|
default: 'http://agent-api.k8s.private'
|
||||||
required: false
|
required: false
|
||||||
api_token:
|
api_token:
|
||||||
description: 'API token for authentication'
|
description: 'API token for authentication'
|
||||||
|
|||||||
1302
dist/index.js
vendored
1302
dist/index.js
vendored
File diff suppressed because one or more lines are too long
227
index.js
227
index.js
@@ -4,30 +4,37 @@ const fs = require("fs");
|
|||||||
const core = require("@actions/core");
|
const core = require("@actions/core");
|
||||||
const github = require("@actions/github");
|
const github = require("@actions/github");
|
||||||
|
|
||||||
const apiUrl =
|
function listMarkdownFiles(executor = execSync) {
|
||||||
core.getInput("api_url") || "http://agents-api.servc-agents:3000";
|
const output = executor("git ls-files '*.md'", {
|
||||||
const apiToken = core.getInput("api_token");
|
encoding: "utf8",
|
||||||
const route = core.getInput("route") || "agent-lake";
|
}).trim();
|
||||||
const method = core.getInput("method") || "embeddings_insert";
|
|
||||||
const debug = (core.getInput("debug") || "false").toLowerCase() === "true";
|
|
||||||
|
|
||||||
const repoFull = process.env.GITHUB_REPOSITORY;
|
return output ? output.split("\n").filter(Boolean) : [];
|
||||||
const [owner, repo] = repoFull.split("/");
|
}
|
||||||
const segment_id = ["docs", owner].join(".");
|
|
||||||
|
function getConfig({
|
||||||
|
coreModule = core,
|
||||||
|
githubModule = github,
|
||||||
|
env = process.env,
|
||||||
|
} = {}) {
|
||||||
|
const apiUrl =
|
||||||
|
coreModule.getInput("api_url") || "http://agent-api.k8s.private";
|
||||||
|
const apiToken = coreModule.getInput("api_token");
|
||||||
|
const route = coreModule.getInput("route") || "agent-lake";
|
||||||
|
const method = coreModule.getInput("method") || "embeddings_insert";
|
||||||
|
const debug =
|
||||||
|
(coreModule.getInput("debug") || "false").toLowerCase() === "true";
|
||||||
|
|
||||||
|
const repoFull = env.GITHUB_REPOSITORY || "";
|
||||||
|
const [owner = "", repo = ""] = repoFull.split("/");
|
||||||
|
const segmentId = "docs.code";
|
||||||
|
|
||||||
const serverUrl = (
|
const serverUrl = (
|
||||||
process.env.GITHUB_SERVER_URL ||
|
env.GITHUB_SERVER_URL ||
|
||||||
github.context.serverUrl ||
|
githubModule.context.serverUrl ||
|
||||||
"https://git.yusufali.ca"
|
"https://git.yusufali.ca"
|
||||||
).replace(/\/$/, "");
|
).replace(/\/$/, "");
|
||||||
|
|
||||||
const markdownFiles = execSync("git ls-files '*.md'", {
|
|
||||||
encoding: "utf8",
|
|
||||||
})
|
|
||||||
.trim()
|
|
||||||
.split("\n")
|
|
||||||
.filter(Boolean);
|
|
||||||
|
|
||||||
const headers = {
|
const headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
};
|
};
|
||||||
@@ -36,69 +43,34 @@ if (apiToken) {
|
|||||||
headers.Apitoken = apiToken;
|
headers.Apitoken = apiToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function post(requestPayload, retries=0) {
|
return {
|
||||||
try{
|
apiUrl,
|
||||||
const response = await fetch(apiUrl, {
|
apiToken,
|
||||||
method: "POST",
|
route,
|
||||||
|
method,
|
||||||
|
debug,
|
||||||
|
owner,
|
||||||
|
repo,
|
||||||
|
segmentId,
|
||||||
|
serverUrl,
|
||||||
headers,
|
headers,
|
||||||
body: JSON.stringify(requestPayload),
|
};
|
||||||
});
|
|
||||||
|
|
||||||
const responseText = await response.text();
|
|
||||||
if (!response.ok) {
|
|
||||||
core.setFailed(
|
|
||||||
`Agent API request failed (${response.status}): ${responseText}`,
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
core.info(`Agent response: ${responseText}`);
|
|
||||||
}
|
|
||||||
} catch(e){
|
|
||||||
if(retries < 5){
|
|
||||||
const delayMs = 1000 * (retries + 1);
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
||||||
return post(requestPayload, retries + 1);
|
|
||||||
}
|
|
||||||
core.setFailed(`Error sending task to agent: ${e}`)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const file of markdownFiles) {
|
function sanitizeDocumentId(value) {
|
||||||
const content = fs.readFileSync(file, "utf8").trim();
|
return value.replace(/[^A-Za-z0-9.]/g, "");
|
||||||
const lines = content.split(/\r?\n/);
|
|
||||||
const h1Line = lines.find((line) => /^#\s+/.test(line)) || "";
|
|
||||||
|
|
||||||
const chunks = [];
|
|
||||||
let current = [];
|
|
||||||
|
|
||||||
for (const line of lines) {
|
|
||||||
if (/^##\s+/.test(line)) {
|
|
||||||
if (current.length) {
|
|
||||||
chunks.push(current.join("\n").trim());
|
|
||||||
}
|
|
||||||
current = [line];
|
|
||||||
} else {
|
|
||||||
current.push(line);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (current.length) {
|
function buildRequestPayload({
|
||||||
chunks.push(current.join("\n").trim());
|
route,
|
||||||
}
|
method,
|
||||||
|
segmentId,
|
||||||
const normalizedChunks =
|
owner,
|
||||||
chunks.length > 0
|
repo,
|
||||||
? chunks.map((chunk) => {
|
file,
|
||||||
let chunkLines = chunk.split(/\r?\n/);
|
content,
|
||||||
if (h1Line && chunkLines[0] === h1Line) {
|
}) {
|
||||||
chunkLines = chunkLines.slice(1);
|
return {
|
||||||
}
|
|
||||||
const body = chunkLines.join("\n").trim();
|
|
||||||
return [h1Line, body].filter(Boolean).join("\n");
|
|
||||||
})
|
|
||||||
: [content];
|
|
||||||
|
|
||||||
normalizedChunks.forEach((chunk, index) => {
|
|
||||||
const requestPayload = {
|
|
||||||
type: "input",
|
type: "input",
|
||||||
route,
|
route,
|
||||||
argumentId: "plain",
|
argumentId: "plain",
|
||||||
@@ -107,24 +79,99 @@ for (const file of markdownFiles) {
|
|||||||
inputs: {
|
inputs: {
|
||||||
method,
|
method,
|
||||||
inputs: {
|
inputs: {
|
||||||
segment_id,
|
segment_id: segmentId,
|
||||||
document_id: [repo, file.replace(".", ""), `part${index + 1}`].join(
|
document_id: sanitizeDocumentId([owner, repo, file].join(".")),
|
||||||
".",
|
embed_text: "",
|
||||||
),
|
store_text: content,
|
||||||
embed_text: chunk,
|
|
||||||
store_text: chunk,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
if (debug) {
|
|
||||||
core.info(`API URL: ${apiUrl}`);
|
|
||||||
core.info(`Route: ${route}`);
|
|
||||||
core.info(`Server URL: ${serverUrl}`);
|
|
||||||
core.info(`Using auth: ${Boolean(apiToken)}`);
|
|
||||||
core.info(`Request payload: ${JSON.stringify(requestPayload)}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
post(requestPayload);
|
async function post(
|
||||||
|
{ apiUrl, headers, requestPayload, coreModule, fetchFn, waitFn },
|
||||||
|
retries = 0,
|
||||||
|
) {
|
||||||
|
try {
|
||||||
|
const response = await fetchFn(apiUrl, {
|
||||||
|
method: "POST",
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify(requestPayload),
|
||||||
|
});
|
||||||
|
|
||||||
|
const responseText = await response.text();
|
||||||
|
if (!response.ok) {
|
||||||
|
coreModule.setFailed(
|
||||||
|
`Agent API request failed (${response.status}): ${responseText}`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
coreModule.info(`Agent response: ${responseText}`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
if (retries < 5) {
|
||||||
|
const delayMs = 1000 * (retries + 1);
|
||||||
|
await waitFn(delayMs);
|
||||||
|
return post(
|
||||||
|
{ apiUrl, headers, requestPayload, coreModule, fetchFn, waitFn },
|
||||||
|
retries + 1,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
coreModule.setFailed(`Error sending task to agent: ${error}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main({
|
||||||
|
coreModule = core,
|
||||||
|
githubModule = github,
|
||||||
|
env = process.env,
|
||||||
|
executor = execSync,
|
||||||
|
fsModule = fs,
|
||||||
|
fetchFn = fetch,
|
||||||
|
waitFn = (delayMs) => new Promise((resolve) => setTimeout(resolve, delayMs)),
|
||||||
|
} = {}) {
|
||||||
|
const config = getConfig({ coreModule, githubModule, env });
|
||||||
|
const markdownFiles = listMarkdownFiles(executor);
|
||||||
|
|
||||||
|
for (const file of markdownFiles) {
|
||||||
|
const content = fsModule.readFileSync(file, "utf8");
|
||||||
|
const requestPayload = buildRequestPayload({
|
||||||
|
route: config.route,
|
||||||
|
method: config.method,
|
||||||
|
segmentId: config.segmentId,
|
||||||
|
owner: config.owner,
|
||||||
|
repo: config.repo,
|
||||||
|
file,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (config.debug) {
|
||||||
|
coreModule.info(`API URL: ${config.apiUrl}`);
|
||||||
|
coreModule.info(`Route: ${config.route}`);
|
||||||
|
coreModule.info(`Server URL: ${config.serverUrl}`);
|
||||||
|
coreModule.info(`Using auth: ${Boolean(config.apiToken)}`);
|
||||||
|
coreModule.info(`Request payload: ${JSON.stringify(requestPayload)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await post({
|
||||||
|
apiUrl: config.apiUrl,
|
||||||
|
headers: config.headers,
|
||||||
|
requestPayload,
|
||||||
|
coreModule,
|
||||||
|
fetchFn,
|
||||||
|
waitFn,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
buildRequestPayload,
|
||||||
|
getConfig,
|
||||||
|
listMarkdownFiles,
|
||||||
|
main,
|
||||||
|
post,
|
||||||
|
sanitizeDocumentId,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (require.main === module) {
|
||||||
|
main();
|
||||||
|
}
|
||||||
|
|||||||
127
index.test.js
Normal file
127
index.test.js
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
const test = require("node:test");
|
||||||
|
const assert = require("node:assert/strict");
|
||||||
|
|
||||||
|
const {
|
||||||
|
buildRequestPayload,
|
||||||
|
getConfig,
|
||||||
|
main,
|
||||||
|
sanitizeDocumentId,
|
||||||
|
} = require("./index");
|
||||||
|
|
||||||
|
test("getConfig defaults to the ingress API URL", () => {
|
||||||
|
const config = getConfig({
|
||||||
|
coreModule: {
|
||||||
|
getInput() {
|
||||||
|
return "";
|
||||||
|
},
|
||||||
|
},
|
||||||
|
githubModule: {
|
||||||
|
context: {
|
||||||
|
serverUrl: "https://github.example",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
env: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(config.apiUrl, "http://agent-api.k8s.private");
|
||||||
|
assert.equal(config.route, "agent-lake");
|
||||||
|
assert.equal(config.method, "embeddings_insert");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("sanitizeDocumentId removes special characters and preserves periods", () => {
|
||||||
|
assert.equal(
|
||||||
|
sanitizeDocumentId("acme.docs-repo.guides/setup v2!.md"),
|
||||||
|
"acme.docsrepo.guidessetupv2.md",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("buildRequestPayload sends the full document in store_text and an empty embed_text", () => {
|
||||||
|
const content = "# Title\n\n## First\nalpha\n\n## Second\nbeta\n";
|
||||||
|
const payload = buildRequestPayload({
|
||||||
|
route: "agent-lake",
|
||||||
|
method: "embeddings_insert",
|
||||||
|
segmentId: "docs.acme",
|
||||||
|
owner: "acme",
|
||||||
|
repo: "docs-repo",
|
||||||
|
file: "guides/setup.md",
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(payload.inputs.inputs.embed_text, "");
|
||||||
|
assert.equal(payload.inputs.inputs.store_text, content);
|
||||||
|
assert.equal(
|
||||||
|
payload.inputs.inputs.document_id,
|
||||||
|
"acme.docsrepo.guidessetup.md",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("main sends one request per file even when the document contains multiple sections", async () => {
|
||||||
|
const calls = [];
|
||||||
|
const infos = [];
|
||||||
|
const failures = [];
|
||||||
|
const content = "# Title\n\n## First\nalpha\n\n## Second\nbeta\n";
|
||||||
|
|
||||||
|
await main({
|
||||||
|
coreModule: {
|
||||||
|
getInput(name) {
|
||||||
|
const inputs = {
|
||||||
|
api_url: "https://agents.example/api",
|
||||||
|
api_token: "",
|
||||||
|
route: "agent-lake",
|
||||||
|
method: "embeddings_insert",
|
||||||
|
debug: "false",
|
||||||
|
};
|
||||||
|
return inputs[name] || "";
|
||||||
|
},
|
||||||
|
info(message) {
|
||||||
|
infos.push(message);
|
||||||
|
},
|
||||||
|
setFailed(message) {
|
||||||
|
failures.push(message);
|
||||||
|
},
|
||||||
|
},
|
||||||
|
githubModule: {
|
||||||
|
context: {
|
||||||
|
serverUrl: "https://github.example",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
env: {
|
||||||
|
GITHUB_REPOSITORY: "acme/docs-repo",
|
||||||
|
GITHUB_SERVER_URL: "https://github.example",
|
||||||
|
},
|
||||||
|
executor() {
|
||||||
|
return "README.md\n";
|
||||||
|
},
|
||||||
|
fsModule: {
|
||||||
|
readFileSync(file, encoding) {
|
||||||
|
assert.equal(file, "README.md");
|
||||||
|
assert.equal(encoding, "utf8");
|
||||||
|
return content;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
fetchFn: async (url, options) => {
|
||||||
|
calls.push({
|
||||||
|
url,
|
||||||
|
options,
|
||||||
|
body: JSON.parse(options.body),
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
ok: true,
|
||||||
|
text: async () => "ok",
|
||||||
|
};
|
||||||
|
},
|
||||||
|
waitFn: async () => {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(calls.length, 1);
|
||||||
|
assert.equal(calls[0].url, "https://agents.example/api");
|
||||||
|
assert.equal(calls[0].body.inputs.inputs.embed_text, "");
|
||||||
|
assert.equal(calls[0].body.inputs.inputs.store_text, content);
|
||||||
|
assert.equal(
|
||||||
|
calls[0].body.inputs.inputs.document_id,
|
||||||
|
"acme.docsrepo.README.md",
|
||||||
|
);
|
||||||
|
assert.deepEqual(failures, []);
|
||||||
|
assert.deepEqual(infos, ["Agent response: ok"]);
|
||||||
|
});
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "node --test"
|
||||||
},
|
},
|
||||||
"keywords": [],
|
"keywords": [],
|
||||||
"author": "",
|
"author": "",
|
||||||
|
|||||||
21
seed_examples.py
Normal file
21
seed_examples.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import yaml
|
||||||
|
from servc_agent.servc_send import process
|
||||||
|
|
||||||
|
with open("examples.yaml") as f:
|
||||||
|
data = yaml.safe_load(f.read())
|
||||||
|
|
||||||
|
for obj in data["examples"]:
|
||||||
|
process(
|
||||||
|
"agent-lake",
|
||||||
|
"embeddings_insert",
|
||||||
|
{
|
||||||
|
"segment_id": data["segment"],
|
||||||
|
"document_id": obj["id"],
|
||||||
|
"embed_text": obj["inputfile"],
|
||||||
|
"store_text": " -> ".join(
|
||||||
|
[f"input: {obj["inputfile"]}", obj["answer"]]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
force=False,
|
||||||
|
poll=False,
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user