Skip to content

Commit b5ca84f

Browse files
authoredSep 5, 2024··
feat: Return computed distance and set distance thresholds on VectorQueries (#2090)
* Return computed distance and set distance thresholds on VectorQueries
1 parent 4fcefde commit b5ca84f

File tree

9 files changed

+1365
-418
lines changed

9 files changed

+1365
-418
lines changed
 

‎api-report/firestore.api.md

+97-73
Large diffs are not rendered by default.

‎dev/src/index.ts

+1
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ export type {AggregateQuery} from './reference/aggregate-query';
103103
export type {AggregateQuerySnapshot} from './reference/aggregate-query-snapshot';
104104
export type {VectorQuery} from './reference/vector-query';
105105
export type {VectorQuerySnapshot} from './reference/vector-query-snapshot';
106+
export type {VectorQueryOptions} from './reference/vector-query-options';
106107
export {BulkWriter} from './bulk-writer';
107108
export type {BulkWriterError} from './bulk-writer';
108109
export type {BundleBuilder} from './bundle';

‎dev/src/reference/query.ts

+69-11
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,9 @@ export class Query<
629629
* @param options - Options control the vector query. `limit` specifies the upper bound of documents to return, must
630630
* be a positive integer with a maximum value of 1000. `distanceMeasure` specifies what type of distance is calculated
631631
* when performing the query.
632+
*
633+
* @deprecated Use the new {@link findNearest} implementation
634+
* accepting a single `options` param.
632635
*/
633636
findNearest(
634637
vectorField: string | firestore.FieldPath,
@@ -637,30 +640,85 @@ export class Query<
637640
limit: number;
638641
distanceMeasure: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT';
639642
}
643+
): VectorQuery<AppModelType, DbModelType>;
644+
645+
/**
646+
* Returns a query that can perform vector distance (similarity) search with given parameters.
647+
*
648+
* The returned query, when executed, performs a distance (similarity) search on the specified
649+
* `vectorField` against the given `queryVector` and returns the top documents that are closest
650+
* to the `queryVector`.
651+
*
652+
* Only documents whose `vectorField` field is a {@link VectorValue} of the same dimension as `queryVector`
653+
* participate in the query, all other documents are ignored.
654+
*
655+
* @example
656+
* ```
657+
* // Returns the closest 10 documents whose Euclidean distance from their 'embedding' fields are closed to [41, 42].
658+
* const vectorQuery = col.findNearest({
659+
* vectorField: 'embedding',
660+
* queryVector: [41, 42],
661+
* limit: 10,
662+
* distanceMeasure: 'EUCLIDEAN',
663+
* distanceResultField: 'distance',
664+
* distanceThreshold: 0.125
665+
* });
666+
*
667+
* const querySnapshot = await aggregateQuery.get();
668+
* querySnapshot.forEach(...);
669+
* ```
670+
* @param options - An argument specifying the behavior of the {@link VectorQuery} returned by this function.
671+
* See {@link VectorQueryOptions}.
672+
*/
673+
findNearest(
674+
options: VectorQueryOptions
675+
): VectorQuery<AppModelType, DbModelType>;
676+
677+
findNearest(
678+
vectorFieldOrOptions: string | firestore.FieldPath | VectorQueryOptions,
679+
queryVector?: firestore.VectorValue | Array<number>,
680+
options?: {
681+
limit?: number;
682+
distanceMeasure?: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT';
683+
}
640684
): VectorQuery<AppModelType, DbModelType> {
641-
validateFieldPath('vectorField', vectorField);
685+
if (
686+
typeof vectorFieldOrOptions === 'string' ||
687+
vectorFieldOrOptions instanceof FieldPath
688+
) {
689+
const vqOptions: VectorQueryOptions = {
690+
distanceMeasure: options!.distanceMeasure!,
691+
limit: options!.limit!,
692+
queryVector: queryVector!,
693+
vectorField: vectorFieldOrOptions,
694+
};
695+
return this._findNearest(vqOptions);
696+
} else {
697+
return this._findNearest(vectorFieldOrOptions as VectorQueryOptions);
698+
}
699+
}
700+
701+
_findNearest(
702+
options: VectorQueryOptions
703+
): VectorQuery<AppModelType, DbModelType> {
704+
validateFieldPath('vectorField', options.vectorField);
642705

643706
if (options.limit <= 0) {
644-
throw invalidArgumentMessage('options.limit', 'positive limit number');
707+
throw invalidArgumentMessage('limit', 'positive limit number');
645708
}
646709

647710
if (
648-
(Array.isArray(queryVector)
649-
? queryVector.length
650-
: queryVector.toArray().length) === 0
711+
(Array.isArray(options.queryVector)
712+
? options.queryVector.length
713+
: options.queryVector.toArray().length) === 0
651714
) {
652715
throw invalidArgumentMessage(
653716
'queryVector',
654717
'vector size must be larger than 0'
655718
);
656719
}
657720

658-
return new VectorQuery<AppModelType, DbModelType>(
659-
this,
660-
vectorField,
661-
queryVector,
662-
new VectorQueryOptions(options.limit, options.distanceMeasure)
663-
);
721+
return new VectorQuery<AppModelType, DbModelType>(this, options);
664722
}
665723

666724
/**

‎dev/src/reference/vector-query-options.ts

+42-17
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,48 @@
1414
* limitations under the License.
1515
*/
1616

17-
export class VectorQueryOptions {
18-
constructor(
19-
readonly limit: number,
20-
readonly distanceMeasure: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT'
21-
) {}
17+
import * as firestore from '@google-cloud/firestore';
2218

23-
isEqual(other: VectorQueryOptions): boolean {
24-
if (this === other) {
25-
return true;
26-
}
27-
if (!(other instanceof VectorQueryOptions)) {
28-
return false;
29-
}
19+
/**
20+
* Specifies the behavior of the {@link VectorQuery} generated by a call to {@link Query.findNearest}.
21+
*/
22+
export interface VectorQueryOptions {
23+
/**
24+
* A string or {@link FieldPath} specifying the vector field to search on.
25+
*/
26+
vectorField: string | firestore.FieldPath;
27+
28+
/**
29+
* The {@link VectorValue} used to measure the distance from `vectorField` values in the documents.
30+
*/
31+
queryVector: firestore.VectorValue | Array<number>;
32+
33+
/**
34+
* Specifies the upper bound of documents to return, must be a positive integer with a maximum value of 1000.
35+
*/
36+
limit: number;
37+
38+
/**
39+
* Specifies what type of distance is calculated when performing the query.
40+
*/
41+
distanceMeasure: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT';
42+
43+
/**
44+
* Optionally specifies the name of a field that will be set on each returned DocumentSnapshot,
45+
* which will contain the computed distance for the document.
46+
*/
47+
distanceResultField?: string | firestore.FieldPath;
3048

31-
return (
32-
this.limit === other.limit &&
33-
this.distanceMeasure === other.distanceMeasure
34-
);
35-
}
49+
/**
50+
* Specifies a threshold for which no less similar documents will be returned. The behavior
51+
* of the specified `distanceMeasure` will affect the meaning of the distance threshold.
52+
*
53+
* - For `distanceMeasure: "EUCLIDEAN"`, the meaning of `distanceThreshold` is:
54+
* SELECT docs WHERE euclidean_distance <= distanceThreshold
55+
* - For `distanceMeasure: "COSINE"`, the meaning of `distanceThreshold` is:
56+
* SELECT docs WHERE cosine_distance <= distanceThreshold
57+
* - For `distanceMeasure: "DOT_PRODUCT"`, the meaning of `distanceThreshold` is:
58+
* SELECT docs WHERE dot_product_distance >= distanceThreshold
59+
*/
60+
distanceThreshold?: number;
3661
}

‎dev/src/reference/vector-query.ts

+38-17
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,7 @@ export class VectorQuery<
5656
*/
5757
constructor(
5858
private readonly _query: Query<AppModelType, DbModelType>,
59-
private readonly vectorField: string | firestore.FieldPath,
60-
private readonly queryVector: firestore.VectorValue | Array<number>,
61-
private readonly options: VectorQueryOptions
59+
private readonly _options: VectorQueryOptions
6260
) {
6361
this._queryUtil = new QueryUtil<
6462
AppModelType,
@@ -79,19 +77,31 @@ export class VectorQuery<
7977
* @internal
8078
*/
8179
private get _rawVectorField(): string {
82-
return typeof this.vectorField === 'string'
83-
? this.vectorField
84-
: this.vectorField.toString();
80+
return typeof this._options.vectorField === 'string'
81+
? this._options.vectorField
82+
: this._options.vectorField.toString();
83+
}
84+
85+
/**
86+
* @private
87+
* @internal
88+
*/
89+
private get _rawDistanceResultField(): string | undefined {
90+
if (typeof this._options.distanceResultField === 'undefined') return;
91+
92+
return typeof this._options.distanceResultField === 'string'
93+
? this._options.distanceResultField
94+
: this._options.distanceResultField.toString();
8595
}
8696

8797
/**
8898
* @private
8999
* @internal
90100
*/
91101
private get _rawQueryVector(): Array<number> {
92-
return Array.isArray(this.queryVector)
93-
? this.queryVector
94-
: this.queryVector.toArray();
102+
return Array.isArray(this._options.queryVector)
103+
? this._options.queryVector
104+
: this._options.queryVector.toArray();
95105
}
96106

97107
/**
@@ -157,7 +167,7 @@ export class VectorQuery<
157167
}
158168

159169
/**
160-
* Internal method for serializing a query to its RunAggregationQuery proto
170+
* Internal method for serializing a query to its proto
161171
* representation with an optional transaction id.
162172
*
163173
* @private
@@ -170,17 +180,25 @@ export class VectorQuery<
170180
): api.IRunQueryRequest {
171181
const queryProto = this._query.toProto(transactionOrReadTime);
172182

173-
const queryVector = Array.isArray(this.queryVector)
174-
? new VectorValue(this.queryVector)
175-
: (this.queryVector as VectorValue);
183+
const queryVector = Array.isArray(this._options.queryVector)
184+
? new VectorValue(this._options.queryVector)
185+
: (this._options.queryVector as VectorValue);
176186

177187
queryProto.structuredQuery!.findNearest = {
178-
limit: {value: this.options.limit},
179-
distanceMeasure: this.options.distanceMeasure,
188+
limit: {value: this._options.limit},
189+
distanceMeasure: this._options.distanceMeasure,
180190
vectorField: {
181-
fieldPath: FieldPath.fromArgument(this.vectorField).formattedName,
191+
fieldPath: FieldPath.fromArgument(this._options.vectorField)
192+
.formattedName,
182193
},
183194
queryVector: queryVector._toProto(this._query._serializer),
195+
distanceResultField: this._options?.distanceResultField
196+
? FieldPath.fromArgument(this._options.distanceResultField!)
197+
.formattedName
198+
: undefined,
199+
distanceThreshold: this._options?.distanceThreshold
200+
? {value: this._options?.distanceThreshold}
201+
: undefined,
184202
};
185203

186204
if (explainOptions) {
@@ -253,7 +271,10 @@ export class VectorQuery<
253271
return (
254272
this._rawVectorField === other._rawVectorField &&
255273
isPrimitiveArrayEqual(this._rawQueryVector, other._rawQueryVector) &&
256-
this.options.isEqual(other.options)
274+
this._options.limit === other._options.limit &&
275+
this._options.distanceMeasure === other._options.distanceMeasure &&
276+
this._options.distanceThreshold === other._options.distanceThreshold &&
277+
this._rawDistanceResultField === other._rawDistanceResultField
257278
);
258279
}
259280
}

‎dev/system-test/firestore.ts

+718-255
Large diffs are not rendered by default.

‎dev/test/vector-query.ts

+311-36
Large diffs are not rendered by default.

‎package.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@
7373
"@google-cloud/trace-agent": "^8.0.0",
7474
"@googleapis/cloudtrace": "^1.1.2",
7575
"@google-cloud/cloud-rad": "^0.4.0",
76-
"@opentelemetry/sdk-trace-node": "^1.24.1",
77-
"@opentelemetry/context-async-hooks": "^1.24.1",
7876
"@google-cloud/opentelemetry-cloud-trace-exporter": "^2.0.0",
77+
"@opentelemetry/context-async-hooks": "^1.24.1",
78+
"@opentelemetry/sdk-trace-node": "^1.24.1",
7979
"@types/assert": "^1.4.0",
8080
"@types/chai": "^4.2.7",
8181
"@types/chai-as-promised": "^7.1.2",

‎types/firestore.d.ts

+87-7
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
// Declare a global (ambient) namespace
2222
// (used when not using import statement, but just script include).
23+
2324
declare namespace FirebaseFirestore {
2425
/** Alias for `any` but used where a Firestore field value would be provided. */
2526
export type DocumentFieldValue = any;
@@ -2035,23 +2036,26 @@ declare namespace FirebaseFirestore {
20352036
* `vectorField` against the given `queryVector` and returns the top documents that are closest
20362037
* to the `queryVector`.
20372038
*
2038-
* Only documents whose `vectorField` field is a `VectorValue` of the same dimension as `queryVector`
2039+
* Only documents whose `vectorField` field is a {@link VectorValue} of the same dimension as `queryVector`
20392040
* participate in the query, all other documents are ignored.
20402041
*
20412042
* @example
2042-
* ```typescript
2043+
* ```
20432044
* // Returns the closest 10 documents whose Euclidean distance from their 'embedding' fields are closed to [41, 42].
20442045
* const vectorQuery = col.findNearest('embedding', [41, 42], {limit: 10, distanceMeasure: 'EUCLIDEAN'});
20452046
*
20462047
* const querySnapshot = await aggregateQuery.get();
20472048
* querySnapshot.forEach(...);
20482049
* ```
20492050
*
2050-
* @param vectorField The field path this vector query executes on.
2051-
* @param queryVector The vector value used to measure the distance from `vectorField` values in the documents.
2052-
* @param options Options control the vector query. `limit` specifies the upper bound of documents to return, must
2053-
* be a positive integer with a maximum value of 1000. `distanceMeasure` specifies what type of distance is
2054-
* calculated when performing the query.
2051+
* @param vectorField - A string or {@link FieldPath} specifying the vector field to search on.
2052+
* @param queryVector - The {@link VectorValue} used to measure the distance from `vectorField` values in the documents.
2053+
* @param options - Options control the vector query. `limit` specifies the upper bound of documents to return, must
2054+
* be a positive integer with a maximum value of 1000. `distanceMeasure` specifies what type of distance is calculated
2055+
* when performing the query.
2056+
*
2057+
* @deprecated Use the new {@link findNearest} implementation
2058+
* accepting a single `options` param.
20552059
*/
20562060
findNearest(
20572061
vectorField: string | FieldPath,
@@ -2062,6 +2066,38 @@ declare namespace FirebaseFirestore {
20622066
}
20632067
): VectorQuery<AppModelType, DbModelType>;
20642068

2069+
/**
2070+
* Returns a query that can perform vector distance (similarity) search with given parameters.
2071+
*
2072+
* The returned query, when executed, performs a distance (similarity) search on the specified
2073+
* `vectorField` against the given `queryVector` and returns the top documents that are closest
2074+
* to the `queryVector`.
2075+
*
2076+
* Only documents whose `vectorField` field is a {@link VectorValue} of the same dimension as `queryVector`
2077+
* participate in the query, all other documents are ignored.
2078+
*
2079+
* @example
2080+
* ```
2081+
* // Returns the closest 10 documents whose Euclidean distance from their 'embedding' fields are closed to [41, 42].
2082+
* const vectorQuery = col.findNearest({
2083+
* vectorField: 'embedding',
2084+
* queryVector: [41, 42],
2085+
* limit: 10,
2086+
* distanceMeasure: 'EUCLIDEAN',
2087+
* distanceResultField: 'distance',
2088+
* distanceThreshold: 0.125
2089+
* });
2090+
*
2091+
* const querySnapshot = await aggregateQuery.get();
2092+
* querySnapshot.forEach(...);
2093+
* ```
2094+
* @param options - An argument specifying the behavior of the {@link VectorQuery} returned by this function.
2095+
* See {@link VectorQueryOptions}.
2096+
*/
2097+
findNearest(
2098+
options: VectorQueryOptions
2099+
): VectorQuery<AppModelType, DbModelType>;
2100+
20652101
/**
20662102
* Returns true if this `Query` is equal to the provided one.
20672103
*
@@ -3192,6 +3228,50 @@ declare namespace FirebaseFirestore {
31923228
*/
31933229
readonly snapshot: T | null;
31943230
}
3231+
3232+
/**
3233+
* Specifies the behavior of the {@link VectorQuery} generated by a call to {@link Query.findNearest}.
3234+
*/
3235+
export interface VectorQueryOptions {
3236+
/**
3237+
* A string or {@link FieldPath} specifying the vector field to search on.
3238+
*/
3239+
vectorField: string | FieldPath;
3240+
3241+
/**
3242+
* The {@link VectorValue} used to measure the distance from `vectorField` values in the documents.
3243+
*/
3244+
queryVector: VectorValue | Array<number>;
3245+
3246+
/**
3247+
* Specifies the upper bound of documents to return, must be a positive integer with a maximum value of 1000.
3248+
*/
3249+
limit: number;
3250+
3251+
/**
3252+
* Specifies what type of distance is calculated when performing the query.
3253+
*/
3254+
distanceMeasure: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT';
3255+
3256+
/**
3257+
* Optionally specifies the name of a field that will be set on each returned DocumentSnapshot,
3258+
* which will contain the computed distance for the document.
3259+
*/
3260+
distanceResultField?: string | FieldPath;
3261+
3262+
/**
3263+
* Specifies a threshold for which no less similar documents will be returned. The behavior
3264+
* of the specified `distanceMeasure` will affect the meaning of the distance threshold.
3265+
*
3266+
* - For `distanceMeasure: "EUCLIDEAN"`, the meaning of `distanceThreshold` is:
3267+
* SELECT docs WHERE euclidean_distance <= distanceThreshold
3268+
* - For `distanceMeasure: "COSINE"`, the meaning of `distanceThreshold` is:
3269+
* SELECT docs WHERE cosine_distance <= distanceThreshold
3270+
* - For `distanceMeasure: "DOT_PRODUCT"`, the meaning of `distanceThreshold` is:
3271+
* SELECT docs WHERE dot_product_distance >= distanceThreshold
3272+
*/
3273+
distanceThreshold?: number;
3274+
}
31953275
}
31963276

31973277
declare module '@google-cloud/firestore' {

0 commit comments

Comments
 (0)
Please sign in to comment.