Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit72f930a

Browse files
authored
Introduce Microsoft.Extensions.DataIngestion.Abstractions (#6949)
1 parent1523a41 commit72f930a

File tree

12 files changed

+730
-0
lines changed

12 files changed

+730
-0
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
usingSystem;
5+
usingSystem.Collections.Generic;
6+
usingSystem.Diagnostics;
7+
usingMicrosoft.Shared.Diagnostics;
8+
9+
namespaceMicrosoft.Extensions.DataIngestion;
10+
11+
/// <summary>
12+
/// Represents a chunk of content extracted from an <see cref="IngestionDocument"/>.
13+
/// </summary>
14+
/// <typeparam name="T">The type of the content.</typeparam>
15+
[DebuggerDisplay("Content = {Content}")]
16+
publicsealedclassIngestionChunk<T>
17+
{
18+
privateDictionary<string,object>?_metadata;
19+
20+
/// <summary>
21+
/// Initializes a new instance of the <see cref="IngestionChunk{T}"/> class.
22+
/// </summary>
23+
/// <param name="content">The content of the chunk.</param>
24+
/// <param name="document">The document from which this chunk was extracted.</param>
25+
/// <param name="context">Additional context for the chunk.</param>
26+
/// <exception cref="ArgumentNullException">
27+
/// <paramref name="content"/> or <paramref name="document"/> is <see langword="null"/>.
28+
/// </exception>
29+
/// <exception cref="ArgumentException">
30+
/// <paramref name="content"/> is a string that is empty or contains only white-space characters.
31+
/// </exception>
32+
publicIngestionChunk(Tcontent,IngestionDocumentdocument,string?context=null)
33+
{
34+
if(typeof(T)==typeof(string))
35+
{
36+
Content=(T)(object)Throw.IfNullOrEmpty((string)(object)content!);
37+
}
38+
else
39+
{
40+
Content=Throw.IfNull(content);
41+
}
42+
43+
Document=Throw.IfNull(document);
44+
Context=context;
45+
}
46+
47+
/// <summary>
48+
/// Gets the content of the chunk.
49+
/// </summary>
50+
publicTContent{get;}
51+
52+
/// <summary>
53+
/// Gets the document from which this chunk was extracted.
54+
/// </summary>
55+
publicIngestionDocumentDocument{get;}
56+
57+
/// <summary>
58+
/// Gets additional context for the chunk.
59+
/// </summary>
60+
publicstring?Context{get;}
61+
62+
/// <summary>
63+
/// Gets a value indicating whether this chunk has metadata.
64+
/// </summary>
65+
publicboolHasMetadata=>_metadata?.Count>0;
66+
67+
/// <summary>
68+
/// Gets the metadata associated with this chunk.
69+
/// </summary>
70+
publicIDictionary<string,object>Metadata=>_metadata??=[];
71+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
usingSystem.Collections.Generic;
5+
usingSystem.Threading;
6+
7+
namespaceMicrosoft.Extensions.DataIngestion;
8+
9+
/// <summary>
10+
/// Processes chunks in a pipeline.
11+
/// </summary>
12+
/// <typeparam name="T">The type of the chunk content.</typeparam>
13+
publicabstractclassIngestionChunkProcessor<T>
14+
{
15+
/// <summary>
16+
/// Processes chunks asynchronously.
17+
/// </summary>
18+
/// <param name="chunks">The chunks to process.</param>
19+
/// <param name="cancellationToken">The token to monitor for cancellation requests.</param>
20+
/// <returns>The processed chunks.</returns>
21+
publicabstractIAsyncEnumerable<IngestionChunk<T>>ProcessAsync(IAsyncEnumerable<IngestionChunk<T>>chunks,CancellationTokencancellationToken=default);
22+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
usingSystem;
5+
usingSystem.Collections.Generic;
6+
usingSystem.Threading;
7+
usingSystem.Threading.Tasks;
8+
9+
namespaceMicrosoft.Extensions.DataIngestion;
10+
11+
/// <summary>
12+
/// Writes chunks to a destination.
13+
/// </summary>
14+
/// <typeparam name="T">The type of the chunk content.</typeparam>
15+
publicabstractclassIngestionChunkWriter<T>:IDisposable
16+
{
17+
/// <summary>
18+
/// Writes chunks asynchronously.
19+
/// </summary>
20+
/// <param name="chunks">The chunks to write.</param>
21+
/// <param name="cancellationToken">The token to monitor for cancellation requests.</param>
22+
/// <returns>A task representing the asynchronous write operation.</returns>
23+
publicabstractTaskWriteAsync(IAsyncEnumerable<IngestionChunk<T>>chunks,CancellationTokencancellationToken=default);
24+
25+
/// <summary>
26+
/// Disposes the writer and releases all associated resources.
27+
/// </summary>
28+
publicvoidDispose()
29+
{
30+
Dispose(disposing:true);
31+
GC.SuppressFinalize(this);
32+
}
33+
34+
/// <summary>
35+
/// Disposes the writer.
36+
/// </summary>
37+
/// <param name="disposing">true if called from dispose, false if called from finalizer.</param>
38+
protectedvirtualvoidDispose(booldisposing)
39+
{
40+
}
41+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
usingSystem.Collections.Generic;
5+
usingSystem.Threading;
6+
7+
namespaceMicrosoft.Extensions.DataIngestion;
8+
9+
/// <summary>
10+
/// Splits an <see cref="IngestionDocument"/> into chunks.
11+
/// </summary>
12+
/// <typeparam name="T">The type of the chunk content.</typeparam>
13+
publicabstractclassIngestionChunker<T>
14+
{
15+
/// <summary>
16+
/// Splits a document into chunks asynchronously.
17+
/// </summary>
18+
/// <param name="document">The document to split.</param>
19+
/// <param name="cancellationToken">The token to monitor for cancellation requests.</param>
20+
/// <returns>The chunks created from the document.</returns>
21+
publicabstractIAsyncEnumerable<IngestionChunk<T>>ProcessAsync(IngestionDocumentdocument,CancellationTokencancellationToken=default);
22+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
usingSystem;
5+
usingSystem.Collections.Generic;
6+
usingMicrosoft.Shared.Diagnostics;
7+
8+
namespaceMicrosoft.Extensions.DataIngestion;
9+
10+
/// <summary>
11+
/// A format-agnostic container that normalizes diverse input formats into a structured hierarchy.
12+
/// </summary>
13+
publicsealedclassIngestionDocument
14+
{
15+
/// <summary>
16+
/// Initializes a new instance of the <see cref="IngestionDocument"/> class.
17+
/// </summary>
18+
/// <param name="identifier">The unique identifier for the document.</param>
19+
/// <exception cref="ArgumentNullException"><paramref name="identifier"/> is <see langword="null"/>.</exception>
20+
publicIngestionDocument(stringidentifier)
21+
{
22+
Identifier=Throw.IfNullOrEmpty(identifier);
23+
}
24+
25+
/// <summary>
26+
/// Gets the unique identifier for the document.
27+
/// </summary>
28+
publicstringIdentifier{get;}
29+
30+
/// <summary>
31+
/// Gets the sections of the document.
32+
/// </summary>
33+
publicIList<IngestionDocumentSection>Sections{get;}=[];
34+
35+
/// <summary>
36+
/// Iterate over all elements in the document, including those in nested sections.
37+
/// </summary>
38+
/// <returns>An enumerable collection of elements.</returns>
39+
/// <remarks>
40+
/// Sections themselves are not included.
41+
/// </remarks>
42+
publicIEnumerable<IngestionDocumentElement>EnumerateContent()
43+
{
44+
Stack<IngestionDocumentElement>elementsToProcess=new();
45+
46+
for(intsectionIndex=Sections.Count-1;sectionIndex>=0;sectionIndex--)
47+
{
48+
elementsToProcess.Push(Sections[sectionIndex]);
49+
}
50+
51+
while(elementsToProcess.Count>0)
52+
{
53+
IngestionDocumentElementcurrentElement=elementsToProcess.Pop();
54+
55+
if(currentElementis notIngestionDocumentSectionnestedSection)
56+
{
57+
yieldreturncurrentElement;
58+
}
59+
else
60+
{
61+
for(inti=nestedSection.Elements.Count-1;i>=0;i--)
62+
{
63+
elementsToProcess.Push(nestedSection.Elements[i]);
64+
}
65+
}
66+
}
67+
}
68+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp