mirror of
https://github.com/Comfy-Org/ComfyUI_frontend.git
synced 2026-02-09 01:20:09 +00:00
[refactor] Upgrade survey normalization to use fuzzy search
Replace regex pattern matching with Fuse.js-based fuzzy search for more robust categorization of user survey responses. Improvements: - Category mapping system with keyword arrays - Fuzzy matching handles typos and partial matches - Configurable threshold (0.6) for match quality - Expanded keyword coverage for better categorization - Maintains existing 16 industry + 10 use case categories - Preserves fallback to "Uncategorized:" prefix Examples now handled: - "animtion" → "Film / TV / Animation" (typo correction) - "game dev" → "Gaming / Interactive Media" (partial match) - "social content" → "Marketing / Advertising / Social Media" (similarity)
This commit is contained in:
@@ -3,11 +3,454 @@
|
||||
*
|
||||
* Smart categorization system to normalize free-text survey responses
|
||||
* into standardized categories for better analytics breakdowns.
|
||||
* Uses Fuse.js for fuzzy matching against category keywords.
|
||||
*/
|
||||
|
||||
import Fuse from 'fuse.js'
|
||||
|
||||
interface CategoryMapping {
|
||||
name: string
|
||||
keywords: string[]
|
||||
userCount?: number // For reference from analysis
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize industry responses into standardized categories
|
||||
* Based on analysis of ~9,000 existing user responses
|
||||
* Industry category mappings based on ~9,000 user analysis
|
||||
*/
|
||||
const INDUSTRY_CATEGORIES: CategoryMapping[] = [
|
||||
{
|
||||
name: 'Film / TV / Animation',
|
||||
userCount: 2885,
|
||||
keywords: [
|
||||
'film',
|
||||
'tv',
|
||||
'television',
|
||||
'animation',
|
||||
'story',
|
||||
'anime',
|
||||
'video',
|
||||
'cinematography',
|
||||
'visual effects',
|
||||
'vfx',
|
||||
'movie',
|
||||
'cinema',
|
||||
'documentary',
|
||||
'broadcast',
|
||||
'streaming',
|
||||
'production',
|
||||
'director',
|
||||
'filmmaker',
|
||||
'post-production',
|
||||
'editing'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Marketing / Advertising / Social Media',
|
||||
userCount: 1340,
|
||||
keywords: [
|
||||
'marketing',
|
||||
'advertising',
|
||||
'youtube',
|
||||
'tiktok',
|
||||
'social media',
|
||||
'content creation',
|
||||
'influencer',
|
||||
'brand',
|
||||
'promotion',
|
||||
'digital marketing',
|
||||
'seo',
|
||||
'campaigns',
|
||||
'copywriting',
|
||||
'growth',
|
||||
'engagement'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Software / IT / AI',
|
||||
userCount: 1100,
|
||||
keywords: [
|
||||
'software',
|
||||
'it',
|
||||
'ai',
|
||||
'developer',
|
||||
'consulting',
|
||||
'engineering',
|
||||
'tech',
|
||||
'programmer',
|
||||
'data science',
|
||||
'machine learning',
|
||||
'coding',
|
||||
'programming',
|
||||
'web development',
|
||||
'app development',
|
||||
'saas',
|
||||
'startup'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Product & Industrial Design',
|
||||
userCount: 1050,
|
||||
keywords: [
|
||||
'product design',
|
||||
'industrial',
|
||||
'manufacturing',
|
||||
'3d rendering',
|
||||
'product visualization',
|
||||
'mechanical',
|
||||
'automotive',
|
||||
'cad',
|
||||
'prototype',
|
||||
'design engineering',
|
||||
'invention'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Fine Art / Contemporary Art',
|
||||
userCount: 780,
|
||||
keywords: [
|
||||
'fine art',
|
||||
'art',
|
||||
'illustration',
|
||||
'contemporary',
|
||||
'artist',
|
||||
'painting',
|
||||
'drawing',
|
||||
'sculpture',
|
||||
'gallery',
|
||||
'canvas',
|
||||
'digital art',
|
||||
'mixed media',
|
||||
'abstract',
|
||||
'portrait'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Education / Research',
|
||||
userCount: 640,
|
||||
keywords: [
|
||||
'education',
|
||||
'student',
|
||||
'teacher',
|
||||
'research',
|
||||
'learning',
|
||||
'university',
|
||||
'school',
|
||||
'academic',
|
||||
'professor',
|
||||
'curriculum',
|
||||
'training',
|
||||
'instruction',
|
||||
'pedagogy'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Architecture / Engineering / Construction',
|
||||
userCount: 420,
|
||||
keywords: [
|
||||
'architecture',
|
||||
'construction',
|
||||
'engineering',
|
||||
'civil',
|
||||
'cad',
|
||||
'building',
|
||||
'structural',
|
||||
'landscape',
|
||||
'interior design',
|
||||
'real estate',
|
||||
'planning',
|
||||
'blueprints'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Gaming / Interactive Media',
|
||||
userCount: 410,
|
||||
keywords: [
|
||||
'gaming',
|
||||
'game dev',
|
||||
'game development',
|
||||
'roblox',
|
||||
'interactive',
|
||||
'virtual world',
|
||||
'vr',
|
||||
'ar',
|
||||
'metaverse',
|
||||
'simulation',
|
||||
'unity',
|
||||
'unreal',
|
||||
'indie games'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Photography / Videography',
|
||||
userCount: 70,
|
||||
keywords: [
|
||||
'photography',
|
||||
'photo',
|
||||
'videography',
|
||||
'camera',
|
||||
'image',
|
||||
'portrait',
|
||||
'wedding',
|
||||
'commercial photo',
|
||||
'stock photography',
|
||||
'photojournalism',
|
||||
'event photography'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Fashion / Beauty / Retail',
|
||||
userCount: 25,
|
||||
keywords: [
|
||||
'fashion',
|
||||
'beauty',
|
||||
'jewelry',
|
||||
'retail',
|
||||
'style',
|
||||
'clothing',
|
||||
'cosmetics',
|
||||
'makeup',
|
||||
'accessories',
|
||||
'boutique',
|
||||
'ecommerce'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Music / Performing Arts',
|
||||
userCount: 25,
|
||||
keywords: [
|
||||
'music',
|
||||
'vj',
|
||||
'dance',
|
||||
'projection mapping',
|
||||
'audio visual',
|
||||
'concert',
|
||||
'performance',
|
||||
'theater',
|
||||
'stage',
|
||||
'live events'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Healthcare / Medical / Life Science',
|
||||
userCount: 30,
|
||||
keywords: [
|
||||
'healthcare',
|
||||
'medical',
|
||||
'doctor',
|
||||
'biotech',
|
||||
'life science',
|
||||
'pharmaceutical',
|
||||
'clinical',
|
||||
'hospital',
|
||||
'medicine',
|
||||
'health'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'E-commerce / Print-on-Demand / Business',
|
||||
userCount: 15,
|
||||
keywords: [
|
||||
'ecommerce',
|
||||
'print on demand',
|
||||
'shop',
|
||||
'business',
|
||||
'commercial',
|
||||
'startup',
|
||||
'entrepreneur',
|
||||
'sales',
|
||||
'retail',
|
||||
'online store'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Nonprofit / Government / Public Sector',
|
||||
userCount: 15,
|
||||
keywords: [
|
||||
'501c3',
|
||||
'ngo',
|
||||
'government',
|
||||
'public service',
|
||||
'policy',
|
||||
'nonprofit',
|
||||
'charity',
|
||||
'civic',
|
||||
'community',
|
||||
'social impact'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Adult / NSFW',
|
||||
userCount: 10,
|
||||
keywords: ['nsfw', 'adult', 'erotic', 'explicit', 'xxx', 'porn']
|
||||
}
|
||||
]
|
||||
|
||||
/**
|
||||
* Use case category mappings based on common patterns
|
||||
*/
|
||||
const USE_CASE_CATEGORIES: CategoryMapping[] = [
|
||||
{
|
||||
name: 'Content Creation & Marketing',
|
||||
keywords: [
|
||||
'content creation',
|
||||
'social media',
|
||||
'marketing',
|
||||
'advertising',
|
||||
'youtube',
|
||||
'tiktok',
|
||||
'instagram',
|
||||
'thumbnails',
|
||||
'posts',
|
||||
'campaigns',
|
||||
'brand content'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Art & Illustration',
|
||||
keywords: [
|
||||
'art',
|
||||
'illustration',
|
||||
'drawing',
|
||||
'painting',
|
||||
'concept art',
|
||||
'character design',
|
||||
'digital art',
|
||||
'fantasy art',
|
||||
'portraits'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Product Visualization & Design',
|
||||
keywords: [
|
||||
'product',
|
||||
'visualization',
|
||||
'design',
|
||||
'prototype',
|
||||
'mockup',
|
||||
'3d rendering',
|
||||
'industrial design',
|
||||
'product photos'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Film & Video Production',
|
||||
keywords: [
|
||||
'film',
|
||||
'video',
|
||||
'movie',
|
||||
'animation',
|
||||
'vfx',
|
||||
'visual effects',
|
||||
'storyboard',
|
||||
'cinematography',
|
||||
'post production'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Gaming & Interactive Media',
|
||||
keywords: [
|
||||
'game',
|
||||
'gaming',
|
||||
'interactive',
|
||||
'vr',
|
||||
'ar',
|
||||
'virtual',
|
||||
'simulation',
|
||||
'metaverse',
|
||||
'game assets',
|
||||
'textures'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Architecture & Construction',
|
||||
keywords: [
|
||||
'architecture',
|
||||
'building',
|
||||
'construction',
|
||||
'interior design',
|
||||
'landscape',
|
||||
'real estate',
|
||||
'floor plans',
|
||||
'renderings'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Education & Training',
|
||||
keywords: [
|
||||
'education',
|
||||
'training',
|
||||
'learning',
|
||||
'teaching',
|
||||
'tutorial',
|
||||
'course',
|
||||
'academic',
|
||||
'instructional',
|
||||
'workshops'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Research & Development',
|
||||
keywords: [
|
||||
'research',
|
||||
'development',
|
||||
'experiment',
|
||||
'prototype',
|
||||
'testing',
|
||||
'analysis',
|
||||
'study',
|
||||
'innovation',
|
||||
'r&d'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Personal & Hobby',
|
||||
keywords: [
|
||||
'personal',
|
||||
'hobby',
|
||||
'fun',
|
||||
'experiment',
|
||||
'learning',
|
||||
'curiosity',
|
||||
'explore',
|
||||
'creative',
|
||||
'side project'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: 'Photography & Image Processing',
|
||||
keywords: [
|
||||
'photography',
|
||||
'photo',
|
||||
'image',
|
||||
'portrait',
|
||||
'editing',
|
||||
'enhancement',
|
||||
'restoration',
|
||||
'photo manipulation'
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
/**
|
||||
* Fuzzy search configuration for category matching
|
||||
*/
|
||||
const FUSE_OPTIONS = {
|
||||
keys: ['keywords'],
|
||||
threshold: 0.4, // Lower = more strict matching
|
||||
includeScore: true,
|
||||
includeMatches: true,
|
||||
ignoreLocation: true,
|
||||
findAllMatches: true
|
||||
}
|
||||
|
||||
/**
|
||||
* Create Fuse instances for category matching
|
||||
*/
|
||||
const industryFuse = new Fuse(INDUSTRY_CATEGORIES, FUSE_OPTIONS)
|
||||
const useCaseFuse = new Fuse(USE_CASE_CATEGORIES, FUSE_OPTIONS)
|
||||
|
||||
/**
|
||||
* Normalize industry responses using fuzzy search
|
||||
*/
|
||||
export function normalizeIndustry(rawIndustry: string): string {
|
||||
if (!rawIndustry || typeof rawIndustry !== 'string') {
|
||||
@@ -16,153 +459,27 @@ export function normalizeIndustry(rawIndustry: string): string {
|
||||
|
||||
const industry = rawIndustry.toLowerCase().trim()
|
||||
|
||||
// Film / TV / Animation (~2,885 users)
|
||||
// Handle common undefined responses
|
||||
if (
|
||||
industry.match(
|
||||
/film|tv|animation|story|anime|video|cinematography|visual effects|vfx|movie|cinema/
|
||||
)
|
||||
) {
|
||||
return 'Film / TV / Animation'
|
||||
}
|
||||
|
||||
// Marketing / Advertising / Social Media (~1,340 users)
|
||||
if (
|
||||
industry.match(
|
||||
/marketing|advertising|youtube|tiktok|social media|content creation|influencer|brand|promotion/
|
||||
)
|
||||
) {
|
||||
return 'Marketing / Advertising / Social Media'
|
||||
}
|
||||
|
||||
// Software / IT / AI (~1,100 users)
|
||||
if (
|
||||
industry.match(
|
||||
/software|it|ai|developer|consulting|engineering|tech|programmer|data science|machine learning/
|
||||
)
|
||||
) {
|
||||
return 'Software / IT / AI'
|
||||
}
|
||||
|
||||
// Product & Industrial Design (~1,050 users)
|
||||
if (
|
||||
industry.match(
|
||||
/product.?design|industrial|manufacturing|3d rendering|product visualization|mechanical|automotive/
|
||||
)
|
||||
) {
|
||||
return 'Product & Industrial Design'
|
||||
}
|
||||
|
||||
// Fine Art / Contemporary Art (~780 users)
|
||||
if (
|
||||
industry.match(
|
||||
/fine.?art|art|illustration|contemporary|artist|painting|drawing|sculpture|gallery/
|
||||
)
|
||||
) {
|
||||
return 'Fine Art / Contemporary Art'
|
||||
}
|
||||
|
||||
// Education / Research (~640 users)
|
||||
if (
|
||||
industry.match(
|
||||
/education|student|teacher|research|learning|university|school|academic|professor/
|
||||
)
|
||||
) {
|
||||
return 'Education / Research'
|
||||
}
|
||||
|
||||
// Architecture / Engineering / Construction (~420 users)
|
||||
if (
|
||||
industry.match(
|
||||
/architecture|construction|engineering|civil|cad|building|structural|landscape/
|
||||
)
|
||||
) {
|
||||
return 'Architecture / Engineering / Construction'
|
||||
}
|
||||
|
||||
// Gaming / Interactive Media (~410 users)
|
||||
if (
|
||||
industry.match(
|
||||
/gaming|game dev|roblox|interactive|virtual world|vr|ar|metaverse|simulation/
|
||||
)
|
||||
) {
|
||||
return 'Gaming / Interactive Media'
|
||||
}
|
||||
|
||||
// Photography / Videography (~70 users)
|
||||
if (
|
||||
industry.match(
|
||||
/photography|photo|videography|camera|image|portrait|wedding|commercial photo/
|
||||
)
|
||||
) {
|
||||
return 'Photography / Videography'
|
||||
}
|
||||
|
||||
// Fashion / Beauty / Retail (~25 users)
|
||||
if (
|
||||
industry.match(
|
||||
/fashion|beauty|jewelry|retail|style|clothing|cosmetics|makeup/
|
||||
)
|
||||
) {
|
||||
return 'Fashion / Beauty / Retail'
|
||||
}
|
||||
|
||||
// Music / Performing Arts (~25 users)
|
||||
if (
|
||||
industry.match(
|
||||
/music|vj|dance|projection mapping|audio visual|concert|performance|theater/
|
||||
)
|
||||
) {
|
||||
return 'Music / Performing Arts'
|
||||
}
|
||||
|
||||
// Healthcare / Medical / Life Science (~30 users)
|
||||
if (
|
||||
industry.match(
|
||||
/healthcare|medical|doctor|biotech|life science|pharmaceutical|clinical|hospital/
|
||||
)
|
||||
) {
|
||||
return 'Healthcare / Medical / Life Science'
|
||||
}
|
||||
|
||||
// E-commerce / Print-on-Demand / Business (~15 users)
|
||||
if (
|
||||
industry.match(
|
||||
/ecommerce|print on demand|shop|business|commercial|startup|entrepreneur|sales/
|
||||
)
|
||||
) {
|
||||
return 'E-commerce / Print-on-Demand / Business'
|
||||
}
|
||||
|
||||
// Nonprofit / Government / Public Sector (~15 users)
|
||||
if (
|
||||
industry.match(
|
||||
/501c3|ngo|government|public service|policy|nonprofit|charity|civic/
|
||||
)
|
||||
) {
|
||||
return 'Nonprofit / Government / Public Sector'
|
||||
}
|
||||
|
||||
// Adult / NSFW (~10 users)
|
||||
if (industry.match(/nsfw|adult|erotic|explicit|xxx|porn/)) {
|
||||
return 'Adult / NSFW'
|
||||
}
|
||||
|
||||
// Other / Undefined - catch common undefined responses
|
||||
if (
|
||||
industry.match(
|
||||
/other|none|undefined|unknown|n\/a|not applicable|^-$|^$|misc/
|
||||
)
|
||||
industry.match(/^(other|none|undefined|unknown|n\/a|not applicable|-|)$/)
|
||||
) {
|
||||
return 'Other / Undefined'
|
||||
}
|
||||
|
||||
// Uncategorized - preserve original but prefix for analysis
|
||||
// Fuzzy search for best category match
|
||||
const results = industryFuse.search(industry)
|
||||
|
||||
if (results.length > 0 && results[0].score! < 0.6) {
|
||||
// Good match found
|
||||
return results[0].item.name
|
||||
}
|
||||
|
||||
// No good match found - preserve original with prefix
|
||||
return `Uncategorized: ${rawIndustry}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize use case responses into standardized categories
|
||||
* Based on common patterns in user responses
|
||||
* Normalize use case responses using fuzzy search
|
||||
*/
|
||||
export function normalizeUseCase(rawUseCase: string): string {
|
||||
if (!rawUseCase || typeof rawUseCase !== 'string') {
|
||||
@@ -171,102 +488,22 @@ export function normalizeUseCase(rawUseCase: string): string {
|
||||
|
||||
const useCase = rawUseCase.toLowerCase().trim()
|
||||
|
||||
// Content Creation & Marketing
|
||||
// Handle common undefined responses
|
||||
if (
|
||||
useCase.match(
|
||||
/content creation|social media|marketing|advertising|youtube|tiktok|instagram|thumbnails/
|
||||
)
|
||||
) {
|
||||
return 'Content Creation & Marketing'
|
||||
}
|
||||
|
||||
// Art & Illustration
|
||||
if (
|
||||
useCase.match(
|
||||
/art|illustration|drawing|painting|concept art|character design|digital art/
|
||||
)
|
||||
) {
|
||||
return 'Art & Illustration'
|
||||
}
|
||||
|
||||
// Product Visualization & Design
|
||||
if (
|
||||
useCase.match(
|
||||
/product|visualization|design|prototype|mockup|3d rendering|industrial design/
|
||||
)
|
||||
) {
|
||||
return 'Product Visualization & Design'
|
||||
}
|
||||
|
||||
// Film & Video Production
|
||||
if (
|
||||
useCase.match(
|
||||
/film|video|movie|animation|vfx|visual effects|storyboard|cinematography/
|
||||
)
|
||||
) {
|
||||
return 'Film & Video Production'
|
||||
}
|
||||
|
||||
// Gaming & Interactive Media
|
||||
if (
|
||||
useCase.match(/game|gaming|interactive|vr|ar|virtual|simulation|metaverse/)
|
||||
) {
|
||||
return 'Gaming & Interactive Media'
|
||||
}
|
||||
|
||||
// Architecture & Construction
|
||||
if (
|
||||
useCase.match(
|
||||
/architecture|building|construction|interior design|landscape|real estate/
|
||||
)
|
||||
) {
|
||||
return 'Architecture & Construction'
|
||||
}
|
||||
|
||||
// Education & Training
|
||||
if (
|
||||
useCase.match(
|
||||
/education|training|learning|teaching|tutorial|course|academic/
|
||||
)
|
||||
) {
|
||||
return 'Education & Training'
|
||||
}
|
||||
|
||||
// Research & Development
|
||||
if (
|
||||
useCase.match(
|
||||
/research|development|experiment|prototype|testing|analysis|study/
|
||||
)
|
||||
) {
|
||||
return 'Research & Development'
|
||||
}
|
||||
|
||||
// Personal & Hobby
|
||||
if (
|
||||
useCase.match(/personal|hobby|fun|experiment|learning|curiosity|explore/)
|
||||
) {
|
||||
return 'Personal & Hobby'
|
||||
}
|
||||
|
||||
// Photography & Image Processing
|
||||
if (
|
||||
useCase.match(
|
||||
/photography|photo|image|portrait|editing|enhancement|restoration/
|
||||
)
|
||||
) {
|
||||
return 'Photography & Image Processing'
|
||||
}
|
||||
|
||||
// Other / Undefined
|
||||
if (
|
||||
useCase.match(
|
||||
/other|none|undefined|unknown|n\/a|not applicable|^-$|^$|misc/
|
||||
)
|
||||
useCase.match(/^(other|none|undefined|unknown|n\/a|not applicable|-|)$/)
|
||||
) {
|
||||
return 'Other / Undefined'
|
||||
}
|
||||
|
||||
// Uncategorized - preserve original but prefix for analysis
|
||||
// Fuzzy search for best category match
|
||||
const results = useCaseFuse.search(useCase)
|
||||
|
||||
if (results.length > 0 && results[0].score! < 0.6) {
|
||||
// Good match found
|
||||
return results[0].item.name
|
||||
}
|
||||
|
||||
// No good match found - preserve original with prefix
|
||||
return `Uncategorized: ${rawUseCase}`
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user