refactor: flatten Microsoft skills from nested to flat directory structure
Rewrote sync_microsoft_skills.py (v4) to use each SKILL.md's frontmatter 'name' field as the flat directory name under skills/, replacing the nested skills/official/microsoft/<lang>/<category>/<service>/ hierarchy. This fixes CI failures caused by the indexing, validation, and catalog scripts expecting skills/<id>/SKILL.md (depth 1). Changes: - Rewrite scripts/sync_microsoft_skills.py for flat output with collision detection - Update scripts/tests/inspect_microsoft_repo.py for flat name mapping - Update scripts/tests/test_comprehensive_coverage.py for name uniqueness checks - Delete skills/official/ nested directory - Add 129 Microsoft skills as flat directories (e.g. skills/azure-mgmt-botservice-dotnet/) - Move attribution files to docs/ (LICENSE-MICROSOFT, microsoft-skills-attribution.json) - Rebuild skills_index.json, CATALOG.md, README.md (845 total skills)
This commit is contained in:
266
skills/azure-search-documents-ts/SKILL.md
Normal file
266
skills/azure-search-documents-ts/SKILL.md
Normal file
@@ -0,0 +1,266 @@
|
||||
---
|
||||
name: azure-search-documents-ts
|
||||
description: Build search applications using Azure AI Search SDK for JavaScript (@azure/search-documents). Use when creating/managing indexes, implementing vector/hybrid search, semantic ranking, or building agentic retrieval with knowledge bases.
|
||||
package: @azure/search-documents
|
||||
---
|
||||
|
||||
# Azure AI Search SDK for TypeScript
|
||||
|
||||
Build search applications with vector, hybrid, and semantic search capabilities.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
npm install @azure/search-documents @azure/identity
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
```bash
|
||||
AZURE_SEARCH_ENDPOINT=https://<service-name>.search.windows.net
|
||||
AZURE_SEARCH_INDEX_NAME=my-index
|
||||
AZURE_SEARCH_ADMIN_KEY=<admin-key> # Optional if using Entra ID
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
```typescript
|
||||
import { SearchClient, SearchIndexClient } from "@azure/search-documents";
|
||||
import { DefaultAzureCredential } from "@azure/identity";
|
||||
|
||||
const endpoint = process.env.AZURE_SEARCH_ENDPOINT!;
|
||||
const indexName = process.env.AZURE_SEARCH_INDEX_NAME!;
|
||||
const credential = new DefaultAzureCredential();
|
||||
|
||||
// For searching
|
||||
const searchClient = new SearchClient(endpoint, indexName, credential);
|
||||
|
||||
// For index management
|
||||
const indexClient = new SearchIndexClient(endpoint, credential);
|
||||
```
|
||||
|
||||
## Core Workflow
|
||||
|
||||
### Create Index with Vector Field
|
||||
|
||||
```typescript
|
||||
import { SearchIndex, SearchField, VectorSearch } from "@azure/search-documents";
|
||||
|
||||
const index: SearchIndex = {
|
||||
name: "products",
|
||||
fields: [
|
||||
{ name: "id", type: "Edm.String", key: true },
|
||||
{ name: "title", type: "Edm.String", searchable: true },
|
||||
{ name: "description", type: "Edm.String", searchable: true },
|
||||
{ name: "category", type: "Edm.String", filterable: true, facetable: true },
|
||||
{
|
||||
name: "embedding",
|
||||
type: "Collection(Edm.Single)",
|
||||
searchable: true,
|
||||
vectorSearchDimensions: 1536,
|
||||
vectorSearchProfileName: "vector-profile",
|
||||
},
|
||||
],
|
||||
vectorSearch: {
|
||||
algorithms: [
|
||||
{ name: "hnsw-algorithm", kind: "hnsw" },
|
||||
],
|
||||
profiles: [
|
||||
{ name: "vector-profile", algorithmConfigurationName: "hnsw-algorithm" },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
await indexClient.createOrUpdateIndex(index);
|
||||
```
|
||||
|
||||
### Index Documents
|
||||
|
||||
```typescript
|
||||
const documents = [
|
||||
{ id: "1", title: "Widget", description: "A useful widget", category: "Tools", embedding: [...] },
|
||||
{ id: "2", title: "Gadget", description: "A cool gadget", category: "Electronics", embedding: [...] },
|
||||
];
|
||||
|
||||
const result = await searchClient.uploadDocuments(documents);
|
||||
console.log(`Indexed ${result.results.length} documents`);
|
||||
```
|
||||
|
||||
### Full-Text Search
|
||||
|
||||
```typescript
|
||||
const results = await searchClient.search("widget", {
|
||||
select: ["id", "title", "description"],
|
||||
filter: "category eq 'Tools'",
|
||||
orderBy: ["title asc"],
|
||||
top: 10,
|
||||
});
|
||||
|
||||
for await (const result of results.results) {
|
||||
console.log(`${result.document.title}: ${result.score}`);
|
||||
}
|
||||
```
|
||||
|
||||
### Vector Search
|
||||
|
||||
```typescript
|
||||
const queryVector = await getEmbedding("useful tool"); // Your embedding function
|
||||
|
||||
const results = await searchClient.search("*", {
|
||||
vectorSearchOptions: {
|
||||
queries: [
|
||||
{
|
||||
kind: "vector",
|
||||
vector: queryVector,
|
||||
fields: ["embedding"],
|
||||
kNearestNeighborsCount: 10,
|
||||
},
|
||||
],
|
||||
},
|
||||
select: ["id", "title", "description"],
|
||||
});
|
||||
|
||||
for await (const result of results.results) {
|
||||
console.log(`${result.document.title}: ${result.score}`);
|
||||
}
|
||||
```
|
||||
|
||||
### Hybrid Search (Text + Vector)
|
||||
|
||||
```typescript
|
||||
const queryVector = await getEmbedding("useful tool");
|
||||
|
||||
const results = await searchClient.search("tool", {
|
||||
vectorSearchOptions: {
|
||||
queries: [
|
||||
{
|
||||
kind: "vector",
|
||||
vector: queryVector,
|
||||
fields: ["embedding"],
|
||||
kNearestNeighborsCount: 50,
|
||||
},
|
||||
],
|
||||
},
|
||||
select: ["id", "title", "description"],
|
||||
top: 10,
|
||||
});
|
||||
```
|
||||
|
||||
### Semantic Search
|
||||
|
||||
```typescript
|
||||
// Index must have semantic configuration
|
||||
const index: SearchIndex = {
|
||||
name: "products",
|
||||
fields: [...],
|
||||
semanticSearch: {
|
||||
configurations: [
|
||||
{
|
||||
name: "semantic-config",
|
||||
prioritizedFields: {
|
||||
titleField: { name: "title" },
|
||||
contentFields: [{ name: "description" }],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
// Search with semantic ranking
|
||||
const results = await searchClient.search("best tool for the job", {
|
||||
queryType: "semantic",
|
||||
semanticSearchOptions: {
|
||||
configurationName: "semantic-config",
|
||||
captions: { captionType: "extractive" },
|
||||
answers: { answerType: "extractive", count: 3 },
|
||||
},
|
||||
select: ["id", "title", "description"],
|
||||
});
|
||||
|
||||
for await (const result of results.results) {
|
||||
console.log(`${result.document.title}`);
|
||||
console.log(` Caption: ${result.captions?.[0]?.text}`);
|
||||
console.log(` Reranker Score: ${result.rerankerScore}`);
|
||||
}
|
||||
```
|
||||
|
||||
## Filtering and Facets
|
||||
|
||||
```typescript
|
||||
// Filter syntax
|
||||
const results = await searchClient.search("*", {
|
||||
filter: "category eq 'Electronics' and price lt 100",
|
||||
facets: ["category,count:10", "brand"],
|
||||
});
|
||||
|
||||
// Access facets
|
||||
for (const [facetName, facetResults] of Object.entries(results.facets || {})) {
|
||||
console.log(`${facetName}:`);
|
||||
for (const facet of facetResults) {
|
||||
console.log(` ${facet.value}: ${facet.count}`);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Autocomplete and Suggestions
|
||||
|
||||
```typescript
|
||||
// Create suggester in index
|
||||
const index: SearchIndex = {
|
||||
name: "products",
|
||||
fields: [...],
|
||||
suggesters: [
|
||||
{ name: "sg", sourceFields: ["title", "description"] },
|
||||
],
|
||||
};
|
||||
|
||||
// Autocomplete
|
||||
const autocomplete = await searchClient.autocomplete("wid", "sg", {
|
||||
mode: "twoTerms",
|
||||
top: 5,
|
||||
});
|
||||
|
||||
// Suggestions
|
||||
const suggestions = await searchClient.suggest("wid", "sg", {
|
||||
select: ["title"],
|
||||
top: 5,
|
||||
});
|
||||
```
|
||||
|
||||
## Batch Operations
|
||||
|
||||
```typescript
|
||||
// Batch upload, merge, delete
|
||||
const batch = [
|
||||
{ upload: { id: "1", title: "New Item" } },
|
||||
{ merge: { id: "2", title: "Updated Title" } },
|
||||
{ delete: { id: "3" } },
|
||||
];
|
||||
|
||||
const result = await searchClient.indexDocuments({ actions: batch });
|
||||
```
|
||||
|
||||
## Key Types
|
||||
|
||||
```typescript
|
||||
import {
|
||||
SearchClient,
|
||||
SearchIndexClient,
|
||||
SearchIndexerClient,
|
||||
SearchIndex,
|
||||
SearchField,
|
||||
SearchOptions,
|
||||
VectorSearch,
|
||||
SemanticSearch,
|
||||
SearchIterator,
|
||||
} from "@azure/search-documents";
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use hybrid search** - Combine vector + text for best results
|
||||
2. **Enable semantic ranking** - Improves relevance for natural language queries
|
||||
3. **Batch document uploads** - Use `uploadDocuments` with arrays, not single docs
|
||||
4. **Use filters for security** - Implement document-level security with filters
|
||||
5. **Index incrementally** - Use `mergeOrUploadDocuments` for updates
|
||||
6. **Monitor query performance** - Use `includeTotalCount: true` sparingly in production
|
||||
Reference in New Issue
Block a user