Skip to content

Commit

Permalink
sId work
Browse files Browse the repository at this point in the history
  • Loading branch information
spolu committed Sep 20, 2024
1 parent 6412862 commit 170c54c
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ export async function submitAssistantBuilderForm({
dataSources: Object.values(
a.configuration.dataSourceConfigurations
).map(({ dataSourceView, selectedResources, isSelectAll }) => ({
dataSourceId: dataSourceView.dataSource.name,
dataSourceViewId: dataSourceView.sId,
workspaceId: owner.sId,
filter: {
Expand Down Expand Up @@ -124,7 +123,6 @@ export async function submitAssistantBuilderForm({
tables: Object.values(a.configuration).flatMap(
({ dataSourceView, selectedResources }) => {
return selectedResources.map((resource) => ({
dataSourceId: dataSourceView.dataSource.name,
dataSourceViewId: dataSourceView.sId,
workspaceId: owner.sId,
tableId: getTableIdForContentNode(
Expand Down Expand Up @@ -160,7 +158,6 @@ export async function submitAssistantBuilderForm({
dataSources: Object.values(
a.configuration.dataSourceConfigurations
).map(({ dataSourceView, selectedResources, isSelectAll }) => ({
dataSourceId: dataSourceView.dataSource.name,
dataSourceViewId: dataSourceView.sId,
workspaceId: owner.sId,
filter: {
Expand Down
63 changes: 38 additions & 25 deletions front/lib/api/assistant/actions/retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import type { AgentActionSpecification } from "@dust-tt/types";
import type { Result } from "@dust-tt/types";
import { BaseAction, isDevelopment } from "@dust-tt/types";
import { Ok } from "@dust-tt/types";
import assert from "assert";

import { runActionStreamed } from "@app/lib/actions/server";
import { DEFAULT_RETRIEVAL_ACTION_NAME } from "@app/lib/api/assistant/actions/names";
Expand Down Expand Up @@ -393,16 +394,28 @@ export class RetrievalConfigurationServerRunner extends BaseActionConfigurationS
DustProdActionRegistry["assistant-v2-retrieval"].config
);

const uniqueDataSourceViewIds = Array.from(
new Set(actionConfiguration.dataSources.map((ds) => ds.dataSourceViewId))
);

const dataSourceViews = await DataSourceViewResource.fetchByIds(
auth,
uniqueDataSourceViewIds
);

const dataSourceViewsMap = Object.fromEntries(
dataSourceViews.map((dsv) => [dsv.sId, dsv])
);

// Handle data sources list and parents/tags filtering.
config.DATASOURCE.data_sources = actionConfiguration.dataSources.map(
(d) => ({
workspace_id:
isDevelopment() && !apiConfig.getDevelopmentDustAppsWorkspaceId()
? PRODUCTION_DUST_WORKSPACE_ID
: d.workspaceId,
// Use dataSourceViewId if it exists; otherwise, use dataSourceId.
// Note: This value is passed to the registry for lookup.
data_source_id: d.dataSourceViewId ?? d.dataSourceId,
data_source_id: d.dataSourceViewId,
})
);

Expand All @@ -417,10 +430,16 @@ export class RetrievalConfigurationServerRunner extends BaseActionConfigurationS
config.DATASOURCE.filter.parents.in_map = {};
}

// Note: We use dataSourceId here because after the registry lookup, it returns either the
// data source itself or the data source associated with the data source view.
config.DATASOURCE.filter.parents.in_map[ds.dataSourceId] =
ds.filter.parents.in;
const dsView = dataSourceViewsMap[ds.dataSourceViewId];
// This should never happen since dataSourceViews are stored by id in the
// agent_data_source_configurations table.
assert(dsView, `Data source view ${ds.dataSourceViewId} not found`);

// Note we use the dustAPIDataSourceId here since this is what is returned from the registry
// lookup.
config.DATASOURCE.filter.parents.in_map[
dsView.dataSource.dustAPIDataSourceId
] = ds.filter.parents.in;
}
if (ds.filter.parents?.not) {
if (!config.DATASOURCE.filter.parents.not) {
Expand Down Expand Up @@ -486,26 +505,13 @@ export class RetrievalConfigurationServerRunner extends BaseActionConfigurationS
dataSourceView: DataSourceViewResource;
}[] = [];

const uniqueDataSourceViewIds = Array.from(
new Set(actionConfiguration.dataSources.map((ds) => ds.dataSourceViewId))
);

const dataSourceViews = await DataSourceViewResource.fetchByIds(
auth,
uniqueDataSourceViewIds
);

const dataSourceViewsMap = Object.fromEntries(
dataSourceViews.map((dsv) => [dsv.sId, dsv])
);

// This is not perfect and will be erroneous in case of two data sources with the same id from
// two different workspaces. We don't support cross workspace data sources right now. But we'll
// likely want `core` to return the `workspace_id` that was used eventualy.
// TODO(spolu): make `core` return data source workspace id.
const dataSourcesIdToWorkspaceId = Object.fromEntries(
const dustAPIDataSourcesIdToDetails = Object.fromEntries(
actionConfiguration.dataSources.map((ds) => [
ds.dataSourceId,
dataSourceViewsMap[ds.dataSourceViewId].dataSource.dustAPIDataSourceId,
{
dataSourceView: dataSourceViewsMap[ds.dataSourceViewId],
workspaceId: ds.workspaceId,
Expand Down Expand Up @@ -609,12 +615,19 @@ export class RetrievalConfigurationServerRunner extends BaseActionConfigurationS
// Prepare an array of document blobs and chunks to be passed to makeNewBatch.
blobs = v.map((d, i) => {
const reference = refs[i % refs.length];
const dsDetails = dataSourcesIdToWorkspaceId[d.data_source_id];

const details = dustAPIDataSourcesIdToDetails[d.data_source_id];
assert(details, `Data source view ${d.data_source_id} not found`);

console.log(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
);
console.log(details);

return {
blob: {
dataSourceWorkspaceId: dsDetails.workspaceId,
dataSourceId: d.data_source_id,
dataSourceWorkspaceId: details.workspaceId,
dataSourceId: details.dataSourceView.sId,
sourceUrl: d.source_url,
documentId: d.document_id,
reference,
Expand All @@ -624,7 +637,7 @@ export class RetrievalConfigurationServerRunner extends BaseActionConfigurationS
retrievalActionId: action.id,
},
chunks: d.chunks,
dataSourceView: dsDetails.dataSourceView,
dataSourceView: details.dataSourceView,
};
});
}
Expand Down
10 changes: 4 additions & 6 deletions front/lib/api/assistant/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1230,16 +1230,14 @@ async function _createAgentDataSourcesConfigData(
"Can't create AgentDataSourceConfiguration for retrieval: DataSourceView not found."
);

const { dataSource } = dataSourceView;

assert(
dataSourceView.dataSource.name === dsConfig.dataSourceId,
"Can't create AgentDataSourceConfiguration for retrieval: data source view does not belong to the data source."
console.log(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
);
console.log(dataSourceView.toJSON());

return AgentDataSourceConfiguration.create(
{
dataSourceId: dataSource.id,
dataSourceId: dataSourceView.dataSource.id,
parentsIn: dsConfig.filter.parents?.in,
parentsNotIn: dsConfig.filter.parents?.not,
retrievalConfigurationId: retrievalConfigurationId,
Expand Down
3 changes: 1 addition & 2 deletions front/lib/models/assistant/actions/retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ export class RetrievalDocument extends Model<
declare tags: string[];
declare score: number | null;

// TODO(VAULTS_INFRA) Make not nullable once backfilled.
// TODO(GROUPS_INFRA): backfill dataSourceViewId for all dataSources that still exist.
declare dataSourceViewId: ForeignKey<DataSourceViewModel["id"]> | null;
declare retrievalActionId: ForeignKey<AgentRetrievalAction["id"]>;

Expand Down Expand Up @@ -343,7 +343,6 @@ RetrievalDocument.belongsTo(AgentRetrievalAction, {
foreignKey: { name: "retrievalActionId", allowNull: false },
});

// TODO(VAULTS_INFRA) Set to not null once backfilled.
DataSourceViewModel.hasMany(RetrievalDocument, {
foreignKey: { allowNull: true },
onDelete: "SET NULL",
Expand Down
2 changes: 1 addition & 1 deletion front/pages/api/registry/[type]/lookup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ async function handler(
},
});
};

const {
data_source_id: dataSourceOrDataSourceViewId,
workspace_id: workspaceId,
} = req.query;

if (
typeof workspaceId !== "string" ||
typeof dataSourceOrDataSourceViewId !== "string"
Expand Down
3 changes: 0 additions & 3 deletions types/src/front/api_handlers/internal/agent_configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ const RetrievalActionConfigurationSchema = t.type({
topK: t.union([t.number, t.literal("auto")]),
dataSources: t.array(
t.type({
dataSourceId: t.string,
dataSourceViewId: t.string,
workspaceId: t.string,
filter: t.type({
Expand All @@ -87,7 +86,6 @@ const TablesQueryActionConfigurationSchema = t.type({
type: t.literal("tables_query_configuration"),
tables: t.array(
t.type({
dataSourceId: t.string,
dataSourceViewId: t.string,
tableId: t.string,
workspaceId: t.string,
Expand All @@ -107,7 +105,6 @@ const ProcessActionConfigurationSchema = t.type({
type: t.literal("process_configuration"),
dataSources: t.array(
t.type({
dataSourceId: t.string,
dataSourceViewId: t.string,
workspaceId: t.string,
filter: t.type({
Expand Down
1 change: 0 additions & 1 deletion types/src/front/assistant/actions/retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ export type DataSourceFilter = {

export type DataSourceConfiguration = {
workspaceId: string;
dataSourceId: string;
dataSourceViewId: string;
filter: DataSourceFilter;
};
Expand Down

0 comments on commit 170c54c

Please sign in to comment.