You've already forked dokumenta-semantiska-analize
Import UAPF package
v1.0.0: dev.uapf.semantic-document-analysis
UAPF v1.1 SSOT-conformant Level 4 process package — reusable semantic document analysis, shareable across DMS / intake / mailroom systems. Structure: - uapf.yaml (kind: uapf.package, level 4) + manifest.json engine-compat - bpmn/semantic-document-analysis.bpmn.xml — 3 service tasks invoking reserved UAPF-IP capabilities ai.redact@1, ai.extract@1, event.emit@1 - resources/mappings.yaml — task->target bindings with I/O contracts - resources/schemas/vdvc-semantic-summary.schema.json — output contract - resources/guardrails.yaml — GDPR + EU AI Act constraints - metadata/ownership.yaml + metadata/lifecycle.yaml - docs/, fixtures/, tests/eval-set.json Validates clean against UAPFormat/UAPF-specification schemas.
This commit is contained in:
32
resources/guardrails.yaml
Normal file
32
resources/guardrails.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
# Non-normative supplementary file. UAPF v1.1 does NOT cornerstone guardrails;
|
||||
# they live under resources/ as a host-readable policy snapshot.
|
||||
authority: dev.uapf.stewards
|
||||
version: "1.0.0"
|
||||
|
||||
privacy:
|
||||
forbidden_in_output:
|
||||
- personal_name
|
||||
- personal_id_number
|
||||
- postal_address
|
||||
- phone_number
|
||||
- email_address
|
||||
- bank_account
|
||||
- iban
|
||||
- health_record_value
|
||||
- biometric_value
|
||||
pii_handling:
|
||||
- "Detected PII MUST be listed in sensitivityControl.detectedEntityTypes as TYPE names only, never values."
|
||||
- "Set personalDataRisk according to detected types: NONE < LOW < MEDIUM < HIGH."
|
||||
|
||||
eu_ai_act:
|
||||
classification: "Annex III §5(a) and §8(a) — high-risk per Regulation 2024/1689"
|
||||
required_transparency_fields:
|
||||
- "semanticSummary.summarySource MUST be \"AI\""
|
||||
- "semanticSummary.aiConfidenceScore MUST be 0.0–1.0"
|
||||
- "semanticSummary.aiModelVersion MUST be the exact model identifier"
|
||||
human_oversight: "humanValidationStatus MUST be PENDING or REQUIRED on completion; consuming higher-level process MUST surface to a human before any consequential action."
|
||||
|
||||
accuracy:
|
||||
- "Do not fabricate fields not supported by source text."
|
||||
- "Set aiConfidenceScore below 0.3 when classification is uncertain."
|
||||
- "If document is unreadable or too short, set humanValidationStatus to REQUIRED."
|
||||
54
resources/mappings.yaml
Normal file
54
resources/mappings.yaml
Normal file
@@ -0,0 +1,54 @@
|
||||
kind: uapf.resources.mapping
|
||||
|
||||
targets:
|
||||
- id: agent.semantic-extractor
|
||||
type: ai_agent
|
||||
name: Semantic Extraction AI Agent
|
||||
description: |
|
||||
Host-provided AI agent that fulfils ai.redact@1, ai.extract@1, and
|
||||
event.emit@1 for this process. Implementation is the host's choice
|
||||
(Claude, GPT, on-prem LLM, etc.); this package supplies the BPMN
|
||||
flow, the output schema, and the guardrails.
|
||||
capabilities:
|
||||
- capability.ai.redact
|
||||
- capability.ai.extract
|
||||
- capability.event.emit
|
||||
|
||||
bindings:
|
||||
- source: { type: bpmn.serviceTask, ref: Task_RedactPii }
|
||||
targetId: agent.semantic-extractor
|
||||
mode: autonomous
|
||||
contract:
|
||||
input:
|
||||
- { name: text, type: string, required: true }
|
||||
- { name: categories, type: array, required: false, description: "Optional PII categories; defaults to host policy." }
|
||||
output:
|
||||
- { name: redactedText, type: string }
|
||||
- { name: detections, type: array }
|
||||
timeout: "10s"
|
||||
requiredCapabilities: [capability.ai.redact]
|
||||
|
||||
- source: { type: bpmn.serviceTask, ref: Task_ExtractSemantics }
|
||||
targetId: agent.semantic-extractor
|
||||
mode: autonomous
|
||||
contract:
|
||||
input:
|
||||
- { name: text, type: string, required: true, description: "Redacted text from previous task." }
|
||||
- { name: schema, type: object, required: true, description: "VDVC v1.1 output schema. Reference: resources/schemas/vdvc-semantic-summary.schema.json" }
|
||||
output:
|
||||
- { name: extracted, type: object, description: "Validates against resources/schemas/vdvc-semantic-summary.schema.json" }
|
||||
- { name: confidence, type: number }
|
||||
- { name: modelUsed, type: string }
|
||||
timeout: "30s"
|
||||
retries: { maxAttempts: 2, backoffMs: 2000 }
|
||||
requiredCapabilities: [capability.ai.extract]
|
||||
|
||||
- source: { type: bpmn.serviceTask, ref: Task_EmitResultEvent }
|
||||
targetId: agent.semantic-extractor
|
||||
mode: autonomous
|
||||
contract:
|
||||
input:
|
||||
- { name: eventType, type: string, required: true }
|
||||
- { name: payload, type: object, required: true }
|
||||
timeout: "5s"
|
||||
requiredCapabilities: [capability.event.emit]
|
||||
49
resources/schemas/vdvc-semantic-summary.schema.json
Normal file
49
resources/schemas/vdvc-semantic-summary.schema.json
Normal file
@@ -0,0 +1,49 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "https://uapf.dev/schemas/vdvc/semantic-summary-v1.1.json",
|
||||
"title": "VDVC Semantic Summary v1.1",
|
||||
"description": "Output contract for ai.extract@1 when invoked by Process_SemanticDocumentAnalysis. The host's AI agent MUST produce output validating against this schema.",
|
||||
"type": "object",
|
||||
"required": ["semanticSummary", "sensitivityControl"],
|
||||
"properties": {
|
||||
"semanticSummary": {
|
||||
"type": "object",
|
||||
"required": ["primaryTopic", "summary", "summarySource", "aiConfidenceScore", "aiModelVersion", "humanValidationStatus"],
|
||||
"properties": {
|
||||
"primaryTopic": { "type": "string", "maxLength": 200 },
|
||||
"subTopics": { "type": "array", "items": { "type": "string" } },
|
||||
"summary": { "type": "string", "maxLength": 4000 },
|
||||
"documentPurpose": { "type": "string", "maxLength": 200 },
|
||||
"requestedAction": { "type": "string", "maxLength": 200 },
|
||||
"involvedPartyTypes": { "type": "array", "items": { "type": "string" }, "description": "Party TYPES only, never names." },
|
||||
"geographicScope": { "type": "string" },
|
||||
"sectorTags": { "type": "array", "items": { "type": "string" } },
|
||||
"legalDomain": { "type": "string" },
|
||||
"estimatedRiskLevel": { "enum": ["LOW", "MEDIUM", "HIGH", "CRITICAL"] },
|
||||
"urgencyLevel": { "enum": ["LOW", "NORMAL", "HIGH", "URGENT"] },
|
||||
"keywords": { "type": "array", "maxItems": 20, "items": { "type": "string" } },
|
||||
"detectedLanguage": { "type": "string", "pattern": "^[a-z]{2}$" },
|
||||
"summarySource": { "const": "AI" },
|
||||
"aiConfidenceScore": { "type": "number", "minimum": 0, "maximum": 1 },
|
||||
"aiModelVersion": { "type": "string" },
|
||||
"humanValidationStatus": { "enum": ["PENDING", "REQUIRED", "VALIDATED", "REJECTED"] },
|
||||
"mentions_child": { "type": "boolean" },
|
||||
"ongoing_harm": { "type": "boolean" },
|
||||
"vulnerable_group": { "type": "boolean" },
|
||||
"criminal_indication": { "type": "boolean" }
|
||||
}
|
||||
},
|
||||
"sensitivityControl": {
|
||||
"type": "object",
|
||||
"required": ["personalDataRisk", "allowCentralization", "redactionLevel"],
|
||||
"properties": {
|
||||
"personalDataRisk": { "enum": ["NONE", "LOW", "MEDIUM", "HIGH"] },
|
||||
"allowCentralization": { "type": "boolean" },
|
||||
"redactionLevel": { "enum": ["NONE", "PARTIAL", "FULL"] },
|
||||
"accessRestrictionBasis":{ "type": "string" },
|
||||
"classifiedInformation": { "type": "boolean" },
|
||||
"detectedEntityTypes": { "type": "array", "items": { "type": "string" } }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user