mirror of
https://github.com/Comfy-Org/ComfyUI_frontend.git
synced 2026-03-01 19:20:10 +00:00
[feat] Add survey response normalization system
Implement smart categorization to normalize free-text survey responses into standardized categories for better analytics breakdowns. Key features: - Industry normalization: 16 major categories based on ~9,000 user analysis - Use case normalization: 10 common patterns for workflow purposes - Dual storage: normalized + raw values preserved - Migration utility: script for cleaning existing Mixpanel data - Pattern matching: regex-based categorization with fallback handling Addresses proliferation of one-off categories that make Mixpanel breakdowns difficult to analyze. Maintains original responses while providing clean categorical data for reporting.
This commit is contained in:
234
scripts/survey-data-migration.ts
Normal file
234
scripts/survey-data-migration.ts
Normal file
@@ -0,0 +1,234 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Survey Data Migration Script
|
||||
*
|
||||
* One-time utility to normalize existing Mixpanel user properties
|
||||
* for industry and use case fields. This addresses the proliferation
|
||||
* of one-off categories that make analytics difficult.
|
||||
*
|
||||
* Usage: pnpm ts-node scripts/survey-data-migration.ts
|
||||
*
|
||||
* IMPORTANT: This script requires Mixpanel Data Management API access
|
||||
* and should be run with appropriate credentials in production.
|
||||
*/
|
||||
|
||||
/* eslint-disable no-console */
|
||||
|
||||
import {
|
||||
normalizeIndustry,
|
||||
normalizeUseCase
|
||||
} from '../src/platform/telemetry/utils/surveyNormalization'
|
||||
|
||||
interface MixpanelUser {
|
||||
$distinct_id: string
|
||||
$properties: {
|
||||
industry?: string
|
||||
useCase?: string
|
||||
[key: string]: any
|
||||
}
|
||||
}
|
||||
|
||||
interface MigrationStats {
|
||||
totalUsers: number
|
||||
industryNormalized: number
|
||||
useCaseNormalized: number
|
||||
uncategorizedIndustries: Set<string>
|
||||
uncategorizedUseCases: Set<string>
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulate the data migration process
|
||||
* In production, this would integrate with Mixpanel Data Management API
|
||||
*/
|
||||
function simulateMigration(users: MixpanelUser[]): MigrationStats {
|
||||
const stats: MigrationStats = {
|
||||
totalUsers: users.length,
|
||||
industryNormalized: 0,
|
||||
useCaseNormalized: 0,
|
||||
uncategorizedIndustries: new Set<string>(),
|
||||
uncategorizedUseCases: new Set<string>()
|
||||
}
|
||||
|
||||
users.forEach((user) => {
|
||||
let needsUpdate = false
|
||||
const updates: Record<string, any> = {}
|
||||
|
||||
// Process industry normalization
|
||||
if (user.$properties.industry) {
|
||||
const normalized = normalizeIndustry(user.$properties.industry)
|
||||
|
||||
if (normalized !== user.$properties.industry) {
|
||||
updates.industry_normalized = normalized
|
||||
updates.industry_raw = user.$properties.industry
|
||||
stats.industryNormalized++
|
||||
needsUpdate = true
|
||||
|
||||
if (normalized.startsWith('Uncategorized:')) {
|
||||
stats.uncategorizedIndustries.add(user.$properties.industry)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process use case normalization
|
||||
if (user.$properties.useCase) {
|
||||
const normalized = normalizeUseCase(user.$properties.useCase)
|
||||
|
||||
if (normalized !== user.$properties.useCase) {
|
||||
updates.useCase_normalized = normalized
|
||||
updates.useCase_raw = user.$properties.useCase
|
||||
stats.useCaseNormalized++
|
||||
needsUpdate = true
|
||||
|
||||
if (normalized.startsWith('Uncategorized:')) {
|
||||
stats.uncategorizedUseCases.add(user.$properties.useCase)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// In production, this would make API calls to update user properties
|
||||
if (needsUpdate) {
|
||||
console.log(`Would update user ${user.$distinct_id}:`, updates)
|
||||
}
|
||||
})
|
||||
|
||||
return stats
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate sample data for testing normalization rules
|
||||
*/
|
||||
function generateSampleData(): MixpanelUser[] {
|
||||
return [
|
||||
{
|
||||
$distinct_id: 'user1',
|
||||
$properties: {
|
||||
industry: 'Film and television production',
|
||||
useCase: 'Creating concept art for movies'
|
||||
}
|
||||
},
|
||||
{
|
||||
$distinct_id: 'user2',
|
||||
$properties: {
|
||||
industry: 'Marketing & Social Media',
|
||||
useCase: 'YouTube thumbnail generation'
|
||||
}
|
||||
},
|
||||
{
|
||||
$distinct_id: 'user3',
|
||||
$properties: {
|
||||
industry: 'Software Development',
|
||||
useCase: 'Product mockup creation'
|
||||
}
|
||||
},
|
||||
{
|
||||
$distinct_id: 'user4',
|
||||
$properties: {
|
||||
industry: 'Indie Game Studio',
|
||||
useCase: 'Game asset generation'
|
||||
}
|
||||
},
|
||||
{
|
||||
$distinct_id: 'user5',
|
||||
$properties: {
|
||||
industry: 'Architecture firm',
|
||||
useCase: 'Building visualization'
|
||||
}
|
||||
},
|
||||
{
|
||||
$distinct_id: 'user6',
|
||||
$properties: {
|
||||
industry: 'Custom Jewelry Design',
|
||||
useCase: 'Product photography'
|
||||
}
|
||||
},
|
||||
{
|
||||
$distinct_id: 'user7',
|
||||
$properties: {
|
||||
industry: 'Medical Research',
|
||||
useCase: 'Scientific visualization'
|
||||
}
|
||||
},
|
||||
{
|
||||
$distinct_id: 'user8',
|
||||
$properties: {
|
||||
industry: 'Unknown Creative Field',
|
||||
useCase: 'Personal art projects'
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Production implementation would use Mixpanel Data Management API
|
||||
* Example API structure (not actual implementation):
|
||||
*/
|
||||
async function productionMigration() {
|
||||
console.log('🔧 Production Migration Process:')
|
||||
console.log('1. Export user profiles via Mixpanel Data Management API')
|
||||
console.log('2. Apply normalization to industry and useCase fields')
|
||||
console.log(
|
||||
'3. Create new properties: industry_normalized, useCase_normalized'
|
||||
)
|
||||
console.log('4. Preserve original values as: industry_raw, useCase_raw')
|
||||
console.log('5. Batch update user profiles')
|
||||
console.log('6. Generate uncategorized response report for review')
|
||||
|
||||
/*
|
||||
Example API calls:
|
||||
|
||||
// 1. Export users
|
||||
const users = await mixpanel.people.query({
|
||||
where: 'properties["industry"] != null or properties["useCase"] != null'
|
||||
})
|
||||
|
||||
// 2. Process and update
|
||||
for (const user of users) {
|
||||
const normalizedData = normalizeSurveyResponses(user.properties)
|
||||
await mixpanel.people.set(user.distinct_id, normalizedData)
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
* Main migration runner
|
||||
*/
|
||||
function main() {
|
||||
console.log('📊 Survey Data Migration Utility')
|
||||
console.log('================================\n')
|
||||
|
||||
// Run simulation with sample data
|
||||
console.log('🧪 Running simulation with sample data...\n')
|
||||
const sampleUsers = generateSampleData()
|
||||
const stats = simulateMigration(sampleUsers)
|
||||
|
||||
// Display results
|
||||
console.log('📈 Migration Results:')
|
||||
console.log(`Total users processed: ${stats.totalUsers}`)
|
||||
console.log(`Industry fields normalized: ${stats.industryNormalized}`)
|
||||
console.log(`Use case fields normalized: ${stats.useCaseNormalized}`)
|
||||
|
||||
if (stats.uncategorizedIndustries.size > 0) {
|
||||
console.log('\n❓ Uncategorized Industries (need review):')
|
||||
Array.from(stats.uncategorizedIndustries).forEach((industry) => {
|
||||
console.log(` • ${industry}`)
|
||||
})
|
||||
}
|
||||
|
||||
if (stats.uncategorizedUseCases.size > 0) {
|
||||
console.log('\n❓ Uncategorized Use Cases (need review):')
|
||||
Array.from(stats.uncategorizedUseCases).forEach((useCase) => {
|
||||
console.log(` • ${useCase}`)
|
||||
})
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(50))
|
||||
void productionMigration()
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
main()
|
||||
}
|
||||
|
||||
export { simulateMigration, generateSampleData, MigrationStats }
|
||||
@@ -7,6 +7,7 @@ import { app } from '@/scripts/app'
|
||||
import { useNodeDefStore } from '@/stores/nodeDefStore'
|
||||
import { NodeSourceType } from '@/types/nodeSource'
|
||||
import { reduceAllNodes } from '@/utils/graphTraversalUtil'
|
||||
import { normalizeSurveyResponses } from '../../utils/surveyNormalization'
|
||||
|
||||
import type {
|
||||
AuthMetadata,
|
||||
@@ -178,7 +179,21 @@ export class MixpanelTelemetryProvider implements TelemetryProvider {
|
||||
? TelemetryEvents.USER_SURVEY_OPENED
|
||||
: TelemetryEvents.USER_SURVEY_SUBMITTED
|
||||
|
||||
this.trackEvent(eventName, responses)
|
||||
// Apply normalization to survey responses
|
||||
const normalizedResponses = responses
|
||||
? normalizeSurveyResponses(responses)
|
||||
: undefined
|
||||
|
||||
this.trackEvent(eventName, normalizedResponses)
|
||||
|
||||
// If this is a survey submission, also set user properties with normalized data
|
||||
if (stage === 'submitted' && normalizedResponses && this.mixpanel) {
|
||||
try {
|
||||
this.mixpanel.people.set(normalizedResponses)
|
||||
} catch (error) {
|
||||
console.error('Failed to set survey user properties:', error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trackEmailVerification(stage: 'opened' | 'requested' | 'completed'): void {
|
||||
|
||||
297
src/platform/telemetry/utils/surveyNormalization.ts
Normal file
297
src/platform/telemetry/utils/surveyNormalization.ts
Normal file
@@ -0,0 +1,297 @@
|
||||
/**
|
||||
* Survey Response Normalization Utilities
|
||||
*
|
||||
* Smart categorization system to normalize free-text survey responses
|
||||
* into standardized categories for better analytics breakdowns.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Normalize industry responses into standardized categories
|
||||
* Based on analysis of ~9,000 existing user responses
|
||||
*/
|
||||
export function normalizeIndustry(rawIndustry: string): string {
|
||||
if (!rawIndustry || typeof rawIndustry !== 'string') {
|
||||
return 'Other / Undefined'
|
||||
}
|
||||
|
||||
const industry = rawIndustry.toLowerCase().trim()
|
||||
|
||||
// Film / TV / Animation (~2,885 users)
|
||||
if (
|
||||
industry.match(
|
||||
/film|tv|animation|story|anime|video|cinematography|visual effects|vfx|movie|cinema/
|
||||
)
|
||||
) {
|
||||
return 'Film / TV / Animation'
|
||||
}
|
||||
|
||||
// Marketing / Advertising / Social Media (~1,340 users)
|
||||
if (
|
||||
industry.match(
|
||||
/marketing|advertising|youtube|tiktok|social media|content creation|influencer|brand|promotion/
|
||||
)
|
||||
) {
|
||||
return 'Marketing / Advertising / Social Media'
|
||||
}
|
||||
|
||||
// Software / IT / AI (~1,100 users)
|
||||
if (
|
||||
industry.match(
|
||||
/software|it|ai|developer|consulting|engineering|tech|programmer|data science|machine learning/
|
||||
)
|
||||
) {
|
||||
return 'Software / IT / AI'
|
||||
}
|
||||
|
||||
// Product & Industrial Design (~1,050 users)
|
||||
if (
|
||||
industry.match(
|
||||
/product.?design|industrial|manufacturing|3d rendering|product visualization|mechanical|automotive/
|
||||
)
|
||||
) {
|
||||
return 'Product & Industrial Design'
|
||||
}
|
||||
|
||||
// Fine Art / Contemporary Art (~780 users)
|
||||
if (
|
||||
industry.match(
|
||||
/fine.?art|art|illustration|contemporary|artist|painting|drawing|sculpture|gallery/
|
||||
)
|
||||
) {
|
||||
return 'Fine Art / Contemporary Art'
|
||||
}
|
||||
|
||||
// Education / Research (~640 users)
|
||||
if (
|
||||
industry.match(
|
||||
/education|student|teacher|research|learning|university|school|academic|professor/
|
||||
)
|
||||
) {
|
||||
return 'Education / Research'
|
||||
}
|
||||
|
||||
// Architecture / Engineering / Construction (~420 users)
|
||||
if (
|
||||
industry.match(
|
||||
/architecture|construction|engineering|civil|cad|building|structural|landscape/
|
||||
)
|
||||
) {
|
||||
return 'Architecture / Engineering / Construction'
|
||||
}
|
||||
|
||||
// Gaming / Interactive Media (~410 users)
|
||||
if (
|
||||
industry.match(
|
||||
/gaming|game dev|roblox|interactive|virtual world|vr|ar|metaverse|simulation/
|
||||
)
|
||||
) {
|
||||
return 'Gaming / Interactive Media'
|
||||
}
|
||||
|
||||
// Photography / Videography (~70 users)
|
||||
if (
|
||||
industry.match(
|
||||
/photography|photo|videography|camera|image|portrait|wedding|commercial photo/
|
||||
)
|
||||
) {
|
||||
return 'Photography / Videography'
|
||||
}
|
||||
|
||||
// Fashion / Beauty / Retail (~25 users)
|
||||
if (
|
||||
industry.match(
|
||||
/fashion|beauty|jewelry|retail|style|clothing|cosmetics|makeup/
|
||||
)
|
||||
) {
|
||||
return 'Fashion / Beauty / Retail'
|
||||
}
|
||||
|
||||
// Music / Performing Arts (~25 users)
|
||||
if (
|
||||
industry.match(
|
||||
/music|vj|dance|projection mapping|audio visual|concert|performance|theater/
|
||||
)
|
||||
) {
|
||||
return 'Music / Performing Arts'
|
||||
}
|
||||
|
||||
// Healthcare / Medical / Life Science (~30 users)
|
||||
if (
|
||||
industry.match(
|
||||
/healthcare|medical|doctor|biotech|life science|pharmaceutical|clinical|hospital/
|
||||
)
|
||||
) {
|
||||
return 'Healthcare / Medical / Life Science'
|
||||
}
|
||||
|
||||
// E-commerce / Print-on-Demand / Business (~15 users)
|
||||
if (
|
||||
industry.match(
|
||||
/ecommerce|print on demand|shop|business|commercial|startup|entrepreneur|sales/
|
||||
)
|
||||
) {
|
||||
return 'E-commerce / Print-on-Demand / Business'
|
||||
}
|
||||
|
||||
// Nonprofit / Government / Public Sector (~15 users)
|
||||
if (
|
||||
industry.match(
|
||||
/501c3|ngo|government|public service|policy|nonprofit|charity|civic/
|
||||
)
|
||||
) {
|
||||
return 'Nonprofit / Government / Public Sector'
|
||||
}
|
||||
|
||||
// Adult / NSFW (~10 users)
|
||||
if (industry.match(/nsfw|adult|erotic|explicit|xxx|porn/)) {
|
||||
return 'Adult / NSFW'
|
||||
}
|
||||
|
||||
// Other / Undefined - catch common undefined responses
|
||||
if (
|
||||
industry.match(
|
||||
/other|none|undefined|unknown|n\/a|not applicable|^-$|^$|misc/
|
||||
)
|
||||
) {
|
||||
return 'Other / Undefined'
|
||||
}
|
||||
|
||||
// Uncategorized - preserve original but prefix for analysis
|
||||
return `Uncategorized: ${rawIndustry}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize use case responses into standardized categories
|
||||
* Based on common patterns in user responses
|
||||
*/
|
||||
export function normalizeUseCase(rawUseCase: string): string {
|
||||
if (!rawUseCase || typeof rawUseCase !== 'string') {
|
||||
return 'Other / Undefined'
|
||||
}
|
||||
|
||||
const useCase = rawUseCase.toLowerCase().trim()
|
||||
|
||||
// Content Creation & Marketing
|
||||
if (
|
||||
useCase.match(
|
||||
/content creation|social media|marketing|advertising|youtube|tiktok|instagram|thumbnails/
|
||||
)
|
||||
) {
|
||||
return 'Content Creation & Marketing'
|
||||
}
|
||||
|
||||
// Art & Illustration
|
||||
if (
|
||||
useCase.match(
|
||||
/art|illustration|drawing|painting|concept art|character design|digital art/
|
||||
)
|
||||
) {
|
||||
return 'Art & Illustration'
|
||||
}
|
||||
|
||||
// Product Visualization & Design
|
||||
if (
|
||||
useCase.match(
|
||||
/product|visualization|design|prototype|mockup|3d rendering|industrial design/
|
||||
)
|
||||
) {
|
||||
return 'Product Visualization & Design'
|
||||
}
|
||||
|
||||
// Film & Video Production
|
||||
if (
|
||||
useCase.match(
|
||||
/film|video|movie|animation|vfx|visual effects|storyboard|cinematography/
|
||||
)
|
||||
) {
|
||||
return 'Film & Video Production'
|
||||
}
|
||||
|
||||
// Gaming & Interactive Media
|
||||
if (
|
||||
useCase.match(/game|gaming|interactive|vr|ar|virtual|simulation|metaverse/)
|
||||
) {
|
||||
return 'Gaming & Interactive Media'
|
||||
}
|
||||
|
||||
// Architecture & Construction
|
||||
if (
|
||||
useCase.match(
|
||||
/architecture|building|construction|interior design|landscape|real estate/
|
||||
)
|
||||
) {
|
||||
return 'Architecture & Construction'
|
||||
}
|
||||
|
||||
// Education & Training
|
||||
if (
|
||||
useCase.match(
|
||||
/education|training|learning|teaching|tutorial|course|academic/
|
||||
)
|
||||
) {
|
||||
return 'Education & Training'
|
||||
}
|
||||
|
||||
// Research & Development
|
||||
if (
|
||||
useCase.match(
|
||||
/research|development|experiment|prototype|testing|analysis|study/
|
||||
)
|
||||
) {
|
||||
return 'Research & Development'
|
||||
}
|
||||
|
||||
// Personal & Hobby
|
||||
if (
|
||||
useCase.match(/personal|hobby|fun|experiment|learning|curiosity|explore/)
|
||||
) {
|
||||
return 'Personal & Hobby'
|
||||
}
|
||||
|
||||
// Photography & Image Processing
|
||||
if (
|
||||
useCase.match(
|
||||
/photography|photo|image|portrait|editing|enhancement|restoration/
|
||||
)
|
||||
) {
|
||||
return 'Photography & Image Processing'
|
||||
}
|
||||
|
||||
// Other / Undefined
|
||||
if (
|
||||
useCase.match(
|
||||
/other|none|undefined|unknown|n\/a|not applicable|^-$|^$|misc/
|
||||
)
|
||||
) {
|
||||
return 'Other / Undefined'
|
||||
}
|
||||
|
||||
// Uncategorized - preserve original but prefix for analysis
|
||||
return `Uncategorized: ${rawUseCase}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply normalization to survey responses
|
||||
* Creates both normalized and raw versions of responses
|
||||
*/
|
||||
export function normalizeSurveyResponses(responses: {
|
||||
industry?: string
|
||||
useCase?: string
|
||||
[key: string]: any
|
||||
}) {
|
||||
const normalized = { ...responses }
|
||||
|
||||
// Normalize industry
|
||||
if (responses.industry) {
|
||||
normalized.industry_normalized = normalizeIndustry(responses.industry)
|
||||
normalized.industry_raw = responses.industry
|
||||
}
|
||||
|
||||
// Normalize use case
|
||||
if (responses.useCase) {
|
||||
normalized.useCase_normalized = normalizeUseCase(responses.useCase)
|
||||
normalized.useCase_raw = responses.useCase
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
Reference in New Issue
Block a user