Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

App Files Files Community

MachineLearningAlgorithms / data /t-SNE.json

deedrop1140

Upload 41 files

0d00d62 verified 3 months ago

raw

history blame contribute delete

47.9 kB

	{
	"title": "t-SNE Mastery: 100 MCQs",
	"description": "A comprehensive set of 100 multiple-choice questions on t-Distributed Stochastic Neighbor Embedding (t-SNE), covering basic theory, medium-level conceptual understanding, and hard scenario-based applications.",
	"questions": [
	{
	"id": 1,
	"questionText": "What does t-SNE stand for?",
	"options": [
	"t-Scaled Neighbor Estimation",
	"t-Distributed Stochastic Neighbor Embedding",
	"Tensor Stochastic Network Embedding",
	"Total Stochastic Neural Embedding"
	],
	"correctAnswerIndex": 1,
	"explanation": "t-SNE stands for t-Distributed Stochastic Neighbor Embedding, a technique for dimensionality reduction and visualization of high-dimensional data."
	},
	{
	"id": 2,
	"questionText": "What is the primary purpose of t-SNE?",
	"options": [
	"Normalizing data features",
	"Generating synthetic data",
	"Reducing dimensionality for visualization",
	"Classifying high-dimensional data"
	],
	"correctAnswerIndex": 2,
	"explanation": "t-SNE is primarily used to reduce high-dimensional data into 2D or 3D for visualization while preserving local structure."
	},
	{
	"id": 3,
	"questionText": "t-SNE is particularly good at preserving which type of data structure?",
	"options": [
	"Linear relationships",
	"Global distances",
	"Local neighborhood structure",
	"Class labels"
	],
	"correctAnswerIndex": 2,
	"explanation": "t-SNE focuses on preserving local similarities, meaning points that are close in high-dimensional space remain close in low-dimensional space."
	},
	{
	"id": 4,
	"questionText": "What is the usual output dimension for t-SNE visualization?",
	"options": [
	"1D",
	"10D",
	"2D or 3D",
	"5D"
	],
	"correctAnswerIndex": 2,
	"explanation": "t-SNE is typically used to reduce data to 2D or 3D for easy visualization."
	},
	{
	"id": 5,
	"questionText": "Which probability distribution is used in the low-dimensional space of t-SNE?",
	"options": [
	"Student’s t-distribution",
	"Gaussian distribution",
	"Binomial distribution",
	"Uniform distribution"
	],
	"correctAnswerIndex": 0,
	"explanation": "t-SNE uses a Student’s t-distribution with one degree of freedom in the low-dimensional space to model pairwise similarities and prevent crowding."
	},
	{
	"id": 6,
	"questionText": "Which cost function does t-SNE minimize?",
	"options": [
	"Cross-Entropy",
	"Euclidean distance",
	"Mean Squared Error",
	"Kullback-Leibler divergence"
	],
	"correctAnswerIndex": 3,
	"explanation": "t-SNE minimizes the Kullback-Leibler divergence between high-dimensional and low-dimensional probability distributions."
	},
	{
	"id": 7,
	"questionText": "In t-SNE, what is 'perplexity'?",
	"options": [
	"Effective number of neighbors considered",
	"Learning rate for gradient descent",
	"A measure of dataset size",
	"Number of output dimensions"
	],
	"correctAnswerIndex": 0,
	"explanation": "Perplexity controls how many neighbors influence the calculation of similarities; it acts like a smooth measure of the number of neighbors."
	},
	{
	"id": 8,
	"questionText": "t-SNE is best suited for which type of relationships?",
	"options": [
	"Only categorical",
	"Linear",
	"Only continuous labels",
	"Non-linear"
	],
	"correctAnswerIndex": 3,
	"explanation": "t-SNE captures non-linear relationships that linear methods like PCA may miss."
	},
	{
	"id": 9,
	"questionText": "Which step is part of the t-SNE algorithm?",
	"options": [
	"Clustering data into fixed bins",
	"Computing pairwise similarities in high-dimensional space",
	"Normalizing labels only",
	"Sorting features alphabetically"
	],
	"correctAnswerIndex": 1,
	"explanation": "t-SNE first computes pairwise similarities between all points in the high-dimensional space."
	},
	{
	"id": 10,
	"questionText": "t-SNE initialization in low-dimensional space is usually:",
	"options": [
	"Zero matrix",
	"Random",
	"Label-based ordering",
	"PCA projection"
	],
	"correctAnswerIndex": 1,
	"explanation": "t-SNE typically starts with random placement of points in the low-dimensional space."
	},
	{
	"id": 11,
	"questionText": "Which of these is a limitation of t-SNE?",
	"options": [
	"Requires categorical labels",
	"Does not work on numeric data",
	"Does not scale well to very large datasets",
	"Cannot handle linear relationships"
	],
	"correctAnswerIndex": 2,
	"explanation": "t-SNE can be computationally expensive and memory-intensive for large datasets."
	},
	{
	"id": 12,
	"questionText": "Which t-SNE hyperparameter affects convergence speed?",
	"options": [
	"Learning rate",
	"Perplexity",
	"KL divergence",
	"Number of features"
	],
	"correctAnswerIndex": 0,
	"explanation": "The learning rate determines the step size in gradient descent optimization of the t-SNE cost function."
	},
	{
	"id": 13,
	"questionText": "t-SNE is mainly used for:",
	"options": [
	"Prediction",
	"Clustering as a main algorithm",
	"Classification",
	"Dimensionality reduction for visualization"
	],
	"correctAnswerIndex": 3,
	"explanation": "t-SNE reduces dimensionality to visualize complex high-dimensional data effectively."
	},
	{
	"id": 14,
	"questionText": "Which of these statements about t-SNE is correct?",
	"options": [
	"It only works on 2D input",
	"It preserves global distances exactly",
	"It produces deterministic results",
	"It preserves local neighborhood structure"
	],
	"correctAnswerIndex": 3,
	"explanation": "t-SNE focuses on maintaining local structure; global distances may be distorted."
	},
	{
	"id": 15,
	"questionText": "t-SNE reduces crowding in low-dimensional space using:",
	"options": [
	"Euclidean distance in high dimension only",
	"Gaussian kernel in high dimension, Student’s t-distribution in low dimension",
	"Uniform distance mapping",
	"PCA initialization only"
	],
	"correctAnswerIndex": 1,
	"explanation": "Using t-distribution in low dimension with heavy tails helps spread out points to avoid crowding."
	},
	{
	"id": 16,
	"questionText": "t-SNE’s output can vary between runs due to:",
	"options": [
	"Random initialization",
	"Gradient descent step size",
	"Data normalization",
	"Perplexity only"
	],
	"correctAnswerIndex": 0,
	"explanation": "Random initialization in low-dimensional space can lead to different local minima in optimization."
	},
	{
	"id": 17,
	"questionText": "Which of these is true about t-SNE and PCA?",
	"options": [
	"t-SNE is deterministic like PCA",
	"Both capture only linear structures",
	"PCA is better for visualization",
	"t-SNE captures non-linear structure; PCA is linear"
	],
	"correctAnswerIndex": 3,
	"explanation": "t-SNE can capture complex non-linear relationships, whereas PCA preserves only linear variance."
	},
	{
	"id": 18,
	"questionText": "t-SNE is not suitable for:",
	"options": [
	"Non-linear data",
	"Small datasets",
	"Large-scale datasets without optimization",
	"2D visualization"
	],
	"correctAnswerIndex": 2,
	"explanation": "t-SNE has high computational and memory cost for very large datasets."
	},
	{
	"id": 19,
	"questionText": "Which t-SNE hyperparameter influences the balance between local and global structure?",
	"options": [
	"Learning rate",
	"Perplexity",
	"Output dimension",
	"KL divergence"
	],
	"correctAnswerIndex": 1,
	"explanation": "Perplexity acts as a smooth measure of the number of neighbors, balancing local vs. slightly broader structures."
	},
	{
	"id": 20,
	"questionText": "t-SNE is stochastic because:",
	"options": [
	"It uses KL divergence",
	"It uses random initialization and gradient descent",
	"It uses linear mapping",
	"It uses PCA first"
	],
	"correctAnswerIndex": 1,
	"explanation": "The combination of random initialization and stochastic optimization leads to variability in results."
	},
	{
	"id": 21,
	"questionText": "Which of these datasets is most appropriate for t-SNE?",
	"options": [
	"Low-dimensional 2D data only",
	"Empty datasets",
	"Categorical data without encoding",
	"High-dimensional numeric data for visualization"
	],
	"correctAnswerIndex": 3,
	"explanation": "t-SNE is designed to visualize high-dimensional data by projecting it to 2D or 3D."
	},
	{
	"id": 22,
	"questionText": "t-SNE helps in which task indirectly?",
	"options": [
	"Label encoding",
	"Understanding clusters or patterns",
	"Making predictions",
	"Model regularization"
	],
	"correctAnswerIndex": 1,
	"explanation": "While t-SNE does not perform clustering, it can help visually identify clusters or patterns."
	},
	{
	"id": 23,
	"questionText": "Why does t-SNE use Student’s t-distribution in low dimensions?",
	"options": [
	"To handle the 'crowding problem' by allowing heavy tails",
	"To increase perplexity",
	"To linearize data",
	"To simplify computation"
	],
	"correctAnswerIndex": 0,
	"explanation": "Heavy-tailed t-distribution spreads out points in low-dimensional space, avoiding crowding."
	},
	{
	"id": 24,
	"questionText": "t-SNE is sensitive to which of the following?",
	"options": [
	"Number of labels only",
	"Dataset size irrelevant",
	"Hyperparameters (perplexity, learning rate) and initialization",
	"Output dimension only"
	],
	"correctAnswerIndex": 2,
	"explanation": "Small changes in parameters or random initialization can significantly affect t-SNE results."
	},
	{
	"id": 25,
	"questionText": "t-SNE preserves which type of distance?",
	"options": [
	"Global Euclidean distance",
	"Local pairwise similarity",
	"Cosine distance",
	"Manhattan distance"
	],
	"correctAnswerIndex": 1,
	"explanation": "t-SNE preserves pairwise similarities among neighbors rather than absolute global distances."
	},
	{
	"id": 26,
	"questionText": "Which of these is a recommended practice before t-SNE?",
	"options": [
	"Removing labels",
	"Standardizing or normalizing features",
	"Shuffling the dataset randomly",
	"Discretizing continuous features"
	],
	"correctAnswerIndex": 1,
	"explanation": "Feature scaling ensures no single feature dominates pairwise distance calculations."
	},
	{
	"id": 27,
	"questionText": "t-SNE is mainly used in which field?",
	"options": [
	"Optimization of hyperparameters",
	"Data visualization, exploratory data analysis",
	"Regression",
	"Prediction"
	],
	"correctAnswerIndex": 1,
	"explanation": "t-SNE helps visualize high-dimensional data in 2D or 3D for analysis and pattern detection."
	},
	{
	"id": 28,
	"questionText": "t-SNE is different from PCA because:",
	"options": [
	"It is non-linear and focuses on local similarities",
	"It reduces to a single principal component",
	"It preserves global linear variance",
	"It always gives deterministic results"
	],
	"correctAnswerIndex": 0,
	"explanation": "Unlike PCA, t-SNE focuses on preserving local structure and can capture complex non-linear relationships."
	},
	{
	"id": 29,
	"questionText": "Which of these can be used to accelerate t-SNE on large datasets?",
	"options": [
	"Reduce iterations to 1",
	"Increase perplexity to maximum",
	"Use raw data without scaling",
	"Barnes-Hut approximation or FIt-SNE"
	],
	"correctAnswerIndex": 3,
	"explanation": "Barnes-Hut t-SNE and FIt-SNE optimize computation for larger datasets."
	},
	{
	"id": 30,
	"questionText": "t-SNE is primarily a ______ technique.",
	"options": [
	"Clustering algorithm",
	"Regression",
	"Classification",
	"Visualization and dimensionality reduction"
	],
	"correctAnswerIndex": 3,
	"explanation": "t-SNE is mainly used to reduce dimensionality of data for visualization purposes."
	},
	{
	"id": 31,
	"questionText": "t-SNE uses which similarity measure in high-dimensional space?",
	"options": [
	"Conditional probability based on Gaussian distribution",
	"Hamming distance",
	"Manhattan distance",
	"Cosine similarity"
	],
	"correctAnswerIndex": 0,
	"explanation": "t-SNE converts pairwise distances into conditional probabilities using a Gaussian distribution to represent similarity in high-dimensional space."
	},
	{
	"id": 32,
	"questionText": "Scenario: You increase t-SNE perplexity from 5 to 50. Likely effect?",
	"options": [
	"KL divergence becomes zero",
	"Clusters appear tighter and more separated",
	"Clusters merge, representing broader neighborhood",
	"Visualization fails"
	],
	"correctAnswerIndex": 2,
	"explanation": "Higher perplexity considers more neighbors, leading to a broader view of local structure and sometimes merging of clusters."
	},
	{
	"id": 33,
	"questionText": "t-SNE output varies between runs due to:",
	"options": [
	"Variance scaling",
	"Perplexity normalization",
	"Gradient descent randomness and initialization",
	"Feature selection"
	],
	"correctAnswerIndex": 2,
	"explanation": "Random initialization combined with stochastic gradient descent optimization can lead to different results in different runs."
	},
	{
	"id": 34,
	"questionText": "Scenario: You apply t-SNE to 1000-dimensional word embeddings. Best practice?",
	"options": [
	"Normalize only labels",
	"Discard half of the words randomly",
	"Optionally perform PCA first to reduce dimensions before t-SNE",
	"Apply t-SNE directly without scaling"
	],
	"correctAnswerIndex": 2,
	"explanation": "Using PCA first reduces noise and computation while retaining most variance, improving t-SNE performance on high-dimensional embeddings."
	},
	{
	"id": 35,
	"questionText": "t-SNE is sensitive to which hyperparameters?",
	"options": [
	"Perplexity, learning rate, number of iterations",
	"Data type",
	"Number of output labels only",
	"PCA components only"
	],
	"correctAnswerIndex": 0,
	"explanation": "Perplexity, learning rate, and iterations significantly influence the optimization and visualization outcome."
	},
	{
	"id": 36,
	"questionText": "Scenario: t-SNE shows distorted global distances. Reason?",
	"options": [
	"t-SNE focuses on preserving local structure, not global distances",
	"Data not normalized",
	"Algorithm failed",
	"Number of components is wrong"
	],
	"correctAnswerIndex": 0,
	"explanation": "t-SNE prioritizes local similarity preservation; global distances may be distorted in low-dimensional visualization."
	},
	{
	"id": 37,
	"questionText": "t-SNE uses which distribution in low-dimensional space to compute similarities?",
	"options": [
	"Student’s t-distribution",
	"Poisson",
	"Uniform",
	"Gaussian"
	],
	"correctAnswerIndex": 0,
	"explanation": "A heavy-tailed Student’s t-distribution is used to avoid crowding in low-dimensional embeddings."
	},
	{
	"id": 38,
	"questionText": "Scenario: t-SNE applied to small dataset, clusters overlap in 2D. Possible reason?",
	"options": [
	"Learning rate too small",
	"All of the above",
	"Data scaled incorrectly",
	"Perplexity too high"
	],
	"correctAnswerIndex": 1,
	"explanation": "Perplexity, learning rate, and feature scaling all affect t-SNE output; poor tuning can cause cluster overlap."
	},
	{
	"id": 39,
	"questionText": "t-SNE reduces dimensionality mainly for:",
	"options": [
	"Prediction accuracy",
	"Label generation",
	"Visualization of high-dimensional patterns",
	"Feature elimination"
	],
	"correctAnswerIndex": 2,
	"explanation": "t-SNE helps visualize complex high-dimensional data by reducing it to 2D or 3D while preserving local structure."
	},
	{
	"id": 40,
	"questionText": "Scenario: t-SNE shows similar points far apart. Likely cause?",
	"options": [
	"Random initialization",
	"All of the above",
	"Insufficient iterations",
	"Improper perplexity or learning rate"
	],
	"correctAnswerIndex": 1,
	"explanation": "All these factors can distort local relationships in low-dimensional mapping."
	},
	{
	"id": 41,
	"questionText": "Scenario: High-dimensional clusters not visible after t-SNE. Solution?",
	"options": [
	"Use PCA for pre-reduction",
	"Tune perplexity and learning rate",
	"All of the above",
	"Increase iterations"
	],
	"correctAnswerIndex": 2,
	"explanation": "Proper hyperparameter tuning, PCA pre-reduction, and enough iterations improve cluster separation."
	},
	{
	"id": 42,
	"questionText": "t-SNE optimization uses which method?",
	"options": [
	"Gradient descent",
	"Random selection",
	"Eigen decomposition only",
	"Closed-form solution"
	],
	"correctAnswerIndex": 0,
	"explanation": "t-SNE minimizes KL divergence using iterative gradient descent."
	},
	{
	"id": 43,
	"questionText": "Scenario: You use t-SNE on image embeddings, clusters appear inconsistent. Recommendation?",
	"options": [
	"Reduce iterations",
	"Use raw pixels without embeddings",
	"Change output dimension to 1D",
	"Repeat multiple runs and average or use PCA initialization"
	],
	"correctAnswerIndex": 3,
	"explanation": "Due to randomness, multiple runs or PCA initialization can stabilize t-SNE visualization."
	},
	{
	"id": 44,
	"questionText": "Scenario: t-SNE applied after PCA with 50 components. Benefit?",
	"options": [
	"Generates labels",
	"Prevents convergence",
	"Distorts local structure",
	"Reduces noise and computation"
	],
	"correctAnswerIndex": 3,
	"explanation": "PCA pre-reduction helps t-SNE handle high-dimensional data efficiently while preserving structure."
	},
	{
	"id": 45,
	"questionText": "t-SNE is mainly affected by:",
	"options": [
	"Number of labels",
	"Hyperparameters and data scaling",
	"Dataset name",
	"Feature type only"
	],
	"correctAnswerIndex": 1,
	"explanation": "t-SNE results are sensitive to perplexity, learning rate, iterations, and proper feature scaling."
	},
	{
	"id": 46,
	"questionText": "Scenario: t-SNE clusters different classes but distorts distances. Interpretation?",
	"options": [
	"Data incorrect",
	"Local structure preserved; global distances may differ",
	"Output dimension wrong",
	"Algorithm failed"
	],
	"correctAnswerIndex": 1,
	"explanation": "t-SNE emphasizes local neighbor relations, which can distort large-scale global distances."
	},
	{
	"id": 47,
	"questionText": "t-SNE is not ideal for:",
	"options": [
	"Exploring patterns",
	"Small datasets",
	"Extremely large datasets without optimization",
	"Visualizing embeddings"
	],
	"correctAnswerIndex": 2,
	"explanation": "t-SNE has high computational cost for very large datasets, though optimized versions exist."
	},
	{
	"id": 48,
	"questionText": "Scenario: Learning rate too high in t-SNE. Effect?",
	"options": [
	"All of the above",
	"Optimization diverges, poor visualization",
	"Slower convergence",
	"Better cluster separation"
	],
	"correctAnswerIndex": 1,
	"explanation": "Excessively high learning rate can prevent gradient descent from converging, causing chaotic mapping."
	},
	{
	"id": 49,
	"questionText": "Scenario: Low perplexity used on dense dataset. Effect?",
	"options": [
	"Improves convergence",
	"All points overlap",
	"Merges clusters",
	"Overemphasizes very local structure, clusters may fragment"
	],
	"correctAnswerIndex": 3,
	"explanation": "Low perplexity focuses on few neighbors, possibly fragmenting clusters that are globally coherent."
	},
	{
	"id": 50,
	"questionText": "Scenario: t-SNE applied to gene expression data for visualization. Useful because?",
	"options": [
	"Generates labels",
	"Predicts outcomes",
	"Reduces features for training",
	"Highlights local patterns and clusters of similar samples"
	],
	"correctAnswerIndex": 3,
	"explanation": "t-SNE reveals underlying patterns in high-dimensional gene expression data."
	},
	{
	"id": 51,
	"questionText": "Scenario: After applying t-SNE, some clusters appear elongated. Likely cause?",
	"options": [
	"Perplexity or learning rate not optimal",
	"Insufficient iterations",
	"Random initialization",
	"All of the above"
	],
	"correctAnswerIndex": 3,
	"explanation": "Cluster shape distortions can result from improper hyperparameters, initialization, or insufficient optimization steps."
	},
	{
	"id": 52,
	"questionText": "Scenario: t-SNE shows overlapping clusters for distinct classes. Recommended action?",
	"options": [
	"Reduce dataset size",
	"Increase output dimensions beyond 3",
	"Use raw data without scaling",
	"Adjust perplexity or learning rate, or try PCA initialization"
	],
	"correctAnswerIndex": 3,
	"explanation": "Hyperparameter tuning and proper initialization help better separate clusters in low-dimensional mapping."
	},
	{
	"id": 53,
	"questionText": "t-SNE can be combined with PCA to:",
	"options": [
	"Replace t-SNE entirely",
	"Increase perplexity automatically",
	"Reduce noise and dimensionality before t-SNE",
	"Generate class labels"
	],
	"correctAnswerIndex": 2,
	"explanation": "Using PCA first reduces high-dimensional noise, improving t-SNE efficiency and visualization quality."
	},
	{
	"id": 54,
	"questionText": "Scenario: t-SNE on image embeddings produces different plots on repeated runs. Reason?",
	"options": [
	"Data scaling issues",
	"Random initialization and stochastic gradient descent",
	"Perplexity too low",
	"Output dimension is too small"
	],
	"correctAnswerIndex": 1,
	"explanation": "Variability is due to random initialization and stochastic optimization inherent to t-SNE."
	},
	{
	"id": 55,
	"questionText": "Scenario: t-SNE applied to word embeddings shows tight clusters merging. Likely reason?",
	"options": [
	"KL divergence minimized",
	"Learning rate too low",
	"Perplexity too high, considering more neighbors",
	"Insufficient iterations"
	],
	"correctAnswerIndex": 2,
	"explanation": "High perplexity broadens the neighborhood, causing close clusters to merge visually."
	},
	{
	"id": 56,
	"questionText": "Scenario: Large dataset t-SNE visualization is slow. Solution?",
	"options": [
	"Reduce perplexity to 1",
	"Increase learning rate to max",
	"Use Barnes-Hut t-SNE or FIt-SNE approximation",
	"Use raw data without normalization"
	],
	"correctAnswerIndex": 2,
	"explanation": "Optimized t-SNE versions like Barnes-Hut or FIt-SNE reduce computation and memory cost for large datasets."
	},
	{
	"id": 57,
	"questionText": "Scenario: t-SNE on 100-dimensional embeddings, output 2D. Why might global distances be inaccurate?",
	"options": [
	"Incorrect PCA initialization",
	"t-SNE prioritizes local neighborhood preservation over global distances",
	"Random features selected",
	"Algorithm failed"
	],
	"correctAnswerIndex": 1,
	"explanation": "t-SNE focuses on preserving local similarities; global distances may be distorted in low-dimensional space."
	},
	{
	"id": 58,
	"questionText": "Scenario: t-SNE applied on noisy dataset. Recommended preprocessing?",
	"options": [
	"Normalize or standardize features, optionally reduce noise with PCA",
	"Reduce output dimension to 1D",
	"Leave data raw",
	"Increase perplexity to max"
	],
	"correctAnswerIndex": 0,
	"explanation": "Scaling and dimensionality reduction improve t-SNE’s ability to capture meaningful structure."
	},
	{
	"id": 59,
	"questionText": "Scenario: Two similar clusters in high-dimensional space are far apart in t-SNE plot. Likely reason?",
	"options": [
	"Data labeling issues",
	"Algorithm failure",
	"Insufficient perplexity or learning rate tuning",
	"Incorrect output dimension"
	],
	"correctAnswerIndex": 2,
	"explanation": "Improper hyperparameters can distort low-dimensional mapping even if local structure is partially preserved."
	},
	{
	"id": 60,
	"questionText": "t-SNE can indirectly help in which of these tasks?",
	"options": [
	"Direct prediction",
	"Visual identification of clusters or patterns",
	"Label encoding",
	"Feature selection for regression"
	],
	"correctAnswerIndex": 1,
	"explanation": "While not a clustering method itself, t-SNE helps visually identify clusters or patterns in high-dimensional data."
	},
	{
	"id": 61,
	"questionText": "Scenario: You want to visualize 10,000 points with t-SNE but it is slow. Best practice?",
	"options": [
	"Increase perplexity to 1000",
	"Use optimized versions like FIt-SNE or reduce dimensionality with PCA first",
	"Randomly remove half the points",
	"Reduce iterations to 10"
	],
	"correctAnswerIndex": 1,
	"explanation": "Optimized algorithms or PCA pre-reduction improve t-SNE performance on large datasets."
	},
	{
	"id": 62,
	"questionText": "Scenario: t-SNE applied on text embeddings shows random patterns. Likely cause?",
	"options": [
	"Low-dimensional output",
	"Dataset too large",
	"Random initialization and inappropriate hyperparameters",
	"Data normalization applied"
	],
	"correctAnswerIndex": 2,
	"explanation": "Random initialization combined with suboptimal perplexity or learning rate can produce unstable visualizations."
	},
	{
	"id": 63,
	"questionText": "t-SNE is particularly useful when:",
	"options": [
	"High-dimensional data visualization is needed",
	"Regression is required",
	"Prediction is the goal",
	"Clustering as a main task"
	],
	"correctAnswerIndex": 0,
	"explanation": "t-SNE is designed for visualization of complex, high-dimensional datasets."
	},
	{
	"id": 64,
	"questionText": "Scenario: t-SNE clusters appear overlapping even after PCA pre-reduction. Recommendation?",
	"options": [
	"Reduce dataset size further",
	"Decrease output dimension to 1D",
	"Tune perplexity and learning rate, or increase iterations",
	"Switch to raw data"
	],
	"correctAnswerIndex": 2,
	"explanation": "Hyperparameter tuning is key to achieving better separation in t-SNE visualizations."
	},
	{
	"id": 65,
	"questionText": "Scenario: t-SNE visualization is chaotic. Possible reasons?",
	"options": [
	"High learning rate, low perplexity, random initialization",
	"PCA used for pre-reduction",
	"Data normalization applied",
	"Output dimension too large"
	],
	"correctAnswerIndex": 0,
	"explanation": "Improper hyperparameters and random initialization can produce poor or chaotic t-SNE plots."
	},
	{
	"id": 66,
	"questionText": "Scenario: You reduce embeddings to 2D with t-SNE, but clusters not apparent. Next step?",
	"options": [
	"Increase dataset size",
	"Change output to 1D",
	"Use raw data only",
	"Adjust perplexity, learning rate, or perform PCA first"
	],
	"correctAnswerIndex": 3,
	"explanation": "Hyperparameter tuning and preprocessing like PCA can help reveal clusters in t-SNE plots."
	},
	{
	"id": 67,
	"questionText": "Scenario: t-SNE applied to 300-dimensional image embeddings, some clusters scattered. Likely reason?",
	"options": [
	"All of the above",
	"Learning rate too low",
	"High-dimensional noise, consider PCA pre-reduction",
	"Perplexity too high"
	],
	"correctAnswerIndex": 0,
	"explanation": "Noise and improper hyperparameters can scatter clusters; preprocessing and tuning are essential."
	},
	{
	"id": 68,
	"questionText": "t-SNE preserves local distances by converting pairwise distances to:",
	"options": [
	"Probabilities using Gaussian in high-d and t-distribution in low-d",
	"Manhattan distance only",
	"Euclidean distances only",
	"Cosine similarity only"
	],
	"correctAnswerIndex": 0,
	"explanation": "Pairwise distances are converted to conditional probabilities in high-d, and Student’s t-distribution in low-d preserves local similarity."
	},
	{
	"id": 69,
	"questionText": "Scenario: You run t-SNE multiple times and get slightly different plots. How to improve consistency?",
	"options": [
	"Increase output dimension to 5D",
	"Decrease dataset size",
	"Normalize labels only",
	"Use PCA initialization and fix random seed"
	],
	"correctAnswerIndex": 3,
	"explanation": "PCA initialization and fixing random seed reduce variability in t-SNE visualization."
	},
	{
	"id": 70,
	"questionText": "Scenario: t-SNE produces compressed clusters in center. Likely cause?",
	"options": [
	"Crowding problem in low-dimensional space",
	"Algorithm failure",
	"Learning rate too low",
	"Perplexity too high"
	],
	"correctAnswerIndex": 0,
	"explanation": "The crowding problem arises because high-dimensional neighborhoods cannot be perfectly represented in low-dimensional space, causing compression."
	},
	{
	"id": 71,
	"questionText": "Scenario: You apply t-SNE on 10,000 image embeddings and clusters appear noisy. Which is the best approach?",
	"options": [
	"Use PCA to reduce dimensions before t-SNE and tune perplexity",
	"Increase learning rate to maximum",
	"Use raw pixel values directly",
	"Reduce output dimensions to 1D"
	],
	"correctAnswerIndex": 0,
	"explanation": "PCA pre-reduction reduces noise and dimensionality, improving t-SNE visualization on large datasets."
	},
	{
	"id": 72,
	"questionText": "Scenario: t-SNE on text embeddings shows overlapping topics. Likely cause?",
	"options": [
	"Perplexity too low or high, or insufficient iterations",
	"Data normalization applied",
	"Output dimension too high",
	"Embedding size too small"
	],
	"correctAnswerIndex": 0,
	"explanation": "Hyperparameter tuning is essential; low/high perplexity or insufficient iterations can cause overlapping clusters."
	},
	{
	"id": 73,
	"questionText": "Scenario: Two clusters in high-dimensional space appear merged in t-SNE plot. What can you do?",
	"options": [
	"Adjust perplexity, learning rate, or use PCA initialization",
	"Increase output dimension to 5D",
	"Normalize labels",
	"Reduce dataset size randomly"
	],
	"correctAnswerIndex": 0,
	"explanation": "Proper hyperparameter tuning and PCA initialization can help separate clusters that appear merged in low-dimensional mapping."
	},
	{
	"id": 74,
	"questionText": "Scenario: Running t-SNE on genomic data, you notice small clusters isolated. Reason?",
	"options": [
	"Perplexity may be low, emphasizing very local neighborhoods",
	"High learning rate",
	"Output dimension too high",
	"Data normalization missing"
	],
	"correctAnswerIndex": 0,
	"explanation": "Low perplexity focuses on very local neighborhoods, potentially isolating small clusters in visualization."
	},
	{
	"id": 75,
	"questionText": "Scenario: t-SNE produces different visualizations on repeated runs. How to stabilize?",
	"options": [
	"Use PCA initialization and fix random seed",
	"Reduce dataset size",
	"Increase output dimension beyond 3D",
	"Use raw data without scaling"
	],
	"correctAnswerIndex": 0,
	"explanation": "PCA initialization and setting a fixed random seed reduce stochastic variation in t-SNE results."
	},
	{
	"id": 76,
	"questionText": "Scenario: t-SNE on high-dimensional sensor data shows tight clusters but global distances are distorted. Interpretation?",
	"options": [
	"Local structure preserved; global distances are not maintained",
	"Algorithm failed",
	"Data incorrectly scaled",
	"Output dimension wrong"
	],
	"correctAnswerIndex": 0,
	"explanation": "t-SNE preserves local pairwise relationships; global distances can appear distorted in 2D/3D visualization."
	},
	{
	"id": 77,
	"questionText": "Scenario: Clusters appear fragmented after t-SNE on customer embeddings. Likely reason?",
	"options": [
	"Perplexity too low",
	"Learning rate too high",
	"Data normalization missing",
	"All of the above"
	],
	"correctAnswerIndex": 3,
	"explanation": "Low perplexity, high learning rate, or improper feature scaling can fragment clusters in t-SNE plots."
	},
	{
	"id": 78,
	"questionText": "Scenario: Applying t-SNE to visualize embeddings after deep learning model training. Best preprocessing?",
	"options": [
	"Normalize features and optionally use PCA to reduce dimensions",
	"Use raw embeddings directly",
	"Randomly shuffle dimensions",
	"Use first two features only"
	],
	"correctAnswerIndex": 0,
	"explanation": "Normalization and PCA pre-reduction enhance t-SNE visualization quality for deep embeddings."
	},
	{
	"id": 79,
	"questionText": "Scenario: Clusters appear compressed in center of t-SNE plot. Likely cause?",
	"options": [
	"Crowding problem inherent to low-dimensional mapping",
	"Algorithm failed",
	"Perplexity too high",
	"Learning rate too low"
	],
	"correctAnswerIndex": 0,
	"explanation": "Crowding problem occurs because high-dimensional neighborhoods cannot be perfectly represented in low dimensions, causing compression."
	},
	{
	"id": 80,
	"questionText": "Scenario: After t-SNE, similar data points are far apart in 2D. Likely reason?",
	"options": [
	"Hyperparameters not tuned correctly",
	"Data normalization failed",
	"Output dimension too high",
	"Labels missing"
	],
	"correctAnswerIndex": 0,
	"explanation": "Incorrect perplexity, learning rate, or insufficient iterations can cause similar points to appear far apart."
	},
	{
	"id": 81,
	"questionText": "Scenario: t-SNE on embeddings shows elongated clusters. Best action?",
	"options": [
	"Adjust perplexity and learning rate, or increase iterations",
	"Reduce output dimension to 1D",
	"Use raw embeddings without scaling",
	"Remove random points"
	],
	"correctAnswerIndex": 0,
	"explanation": "Cluster elongation often occurs due to suboptimal hyperparameters; tuning and more iterations can improve results."
	},
	{
	"id": 82,
	"questionText": "Scenario: Visualizing 50,000 text embeddings with t-SNE is very slow. Solution?",
	"options": [
	"Use FIt-SNE or Barnes-Hut t-SNE for faster computation",
	"Reduce perplexity to 1",
	"Use raw text instead of embeddings",
	"Decrease output dimensions to 1D"
	],
	"correctAnswerIndex": 0,
	"explanation": "Optimized t-SNE implementations significantly speed up visualization of large datasets."
	},
	{
	"id": 83,
	"questionText": "Scenario: t-SNE clusters overlap despite tuning. Next step?",
	"options": [
	"Consider alternative dimensionality reduction methods like UMAP",
	"Reduce output dimensions further",
	"Use raw data without embeddings",
	"Remove labels"
	],
	"correctAnswerIndex": 0,
	"explanation": "If t-SNE cannot separate clusters even after tuning, UMAP or other DR methods might better preserve structure."
	},
	{
	"id": 84,
	"questionText": "Scenario: t-SNE on protein expression data shows some scattered clusters. Likely cause?",
	"options": [
	"Noise in high-dimensional data",
	"Insufficient iterations",
	"Suboptimal hyperparameters",
	"All of the above"
	],
	"correctAnswerIndex": 3,
	"explanation": "Noise and suboptimal hyperparameters can cause scattered clusters; preprocessing and tuning help visualization."
	},
	{
	"id": 85,
	"questionText": "Scenario: You apply t-SNE after PCA to reduce 500D embeddings to 50D. Why?",
	"options": [
	"Reduce computation and noise while retaining important variance",
	"Increase global distance preservation",
	"Generate labels automatically",
	"Visualize in 3D directly"
	],
	"correctAnswerIndex": 0,
	"explanation": "PCA pre-reduction helps t-SNE efficiently process high-dimensional data while keeping meaningful structure."
	},
	{
	"id": 86,
	"questionText": "Scenario: Clusters appear stretched along a single axis. Likely cause?",
	"options": [
	"Perplexity too high or learning rate too low",
	"Algorithm failure",
	"Data normalization missing",
	"Incorrect output dimension"
	],
	"correctAnswerIndex": 0,
	"explanation": "Improper hyperparameters can cause cluster elongation in low-dimensional embeddings."
	},
	{
	"id": 87,
	"questionText": "Scenario: t-SNE applied to 300-dimensional embeddings of customer behavior. Output 2D. What can distort clusters?",
	"options": [
	"Random initialization, hyperparameters, noisy features",
	"Data scaling applied",
	"Output dimension too high",
	"Label missing"
	],
	"correctAnswerIndex": 0,
	"explanation": "Cluster distortion occurs due to noise, initialization randomness, and hyperparameter settings."
	},
	{
	"id": 88,
	"questionText": "Scenario: t-SNE output differs between runs. Best practice to make consistent?",
	"options": [
	"Fix random seed and use PCA initialization",
	"Use raw data directly",
	"Reduce output dimension to 1D",
	"Change KL divergence formula"
	],
	"correctAnswerIndex": 0,
	"explanation": "Fixed random seed and PCA initialization reduce stochastic variation across runs."
	},
	{
	"id": 89,
	"questionText": "Scenario: t-SNE applied to embeddings shows overlapping clusters, perplexity set to 5. Recommendation?",
	"options": [
	"Increase perplexity to consider more neighbors",
	"Reduce learning rate",
	"Decrease output dimension to 1D",
	"Use raw high-dimensional features"
	],
	"correctAnswerIndex": 0,
	"explanation": "Low perplexity can underrepresent neighborhood structure; increasing perplexity may separate clusters better."
	},
	{
	"id": 90,
	"questionText": "Scenario: After t-SNE, similar embeddings appear scattered. Likely hyperparameter issue?",
	"options": [
	"Learning rate too high or perplexity not optimal",
	"Output dimension too high",
	"Labels missing",
	"Data normalized incorrectly"
	],
	"correctAnswerIndex": 0,
	"explanation": "Improper learning rate or perplexity can scatter similar points, reducing visualization quality."
	},
	{
	"id": 91,
	"questionText": "Scenario: t-SNE shows different cluster sizes for similar data. Why?",
	"options": [
	"Local density differences and crowding problem in low-dimensional space",
	"Algorithm failure",
	"Incorrect output dimension",
	"Data normalization missing"
	],
	"correctAnswerIndex": 0,
	"explanation": "t-SNE preserves local structure; dense regions appear larger and sparse regions smaller due to crowding problem."
	},
	{
	"id": 92,
	"questionText": "Scenario: You want faster t-SNE on 100,000 points. Recommendation?",
	"options": [
	"Use Barnes-Hut or FIt-SNE approximation",
	"Reduce output dimension to 1D",
	"Use raw data without scaling",
	"Decrease perplexity to 1"
	],
	"correctAnswerIndex": 0,
	"explanation": "Optimized t-SNE versions reduce computation and memory for large datasets."
	},
	{
	"id": 93,
	"questionText": "Scenario: t-SNE shows elongated clusters. Likely hyperparameter adjustment?",
	"options": [
	"Adjust perplexity and learning rate, or increase iterations",
	"Reduce dataset size",
	"Use raw data",
	"Remove features randomly"
	],
	"correctAnswerIndex": 0,
	"explanation": "Cluster elongation often occurs due to improper hyperparameters; tuning can improve visualization."
	},
	{
	"id": 94,
	"questionText": "Scenario: t-SNE on embedding shows isolated points far from clusters. Likely reason?",
	"options": [
	"Outliers or low perplexity emphasizing local neighborhoods",
	"Algorithm failure",
	"Output dimension too high",
	"Data normalization missing"
	],
	"correctAnswerIndex": 0,
	"explanation": "Outliers or very low perplexity can cause points to appear isolated in visualization."
	},
	{
	"id": 95,
	"questionText": "Scenario: You want t-SNE results reproducible across runs. Steps?",
	"options": [
	"Fix random seed, use PCA initialization, standardize features",
	"Increase output dimensions",
	"Reduce dataset size",
	"Use raw data"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reproducibility requires controlling randomness and preprocessing consistently."
	},
	{
	"id": 96,
	"questionText": "Scenario: t-SNE applied to multi-class embeddings, some classes overlap. Best solution?",
	"options": [
	"Tune perplexity, learning rate, or try PCA initialization",
	"Reduce number of classes",
	"Change output to 1D",
	"Use raw features without preprocessing"
	],
	"correctAnswerIndex": 0,
	"explanation": "Hyperparameter tuning and PCA initialization often improve cluster separation for multi-class data."
	},
	{
	"id": 97,
	"questionText": "Scenario: t-SNE visualization shows tight clusters compressed together. Likely cause?",
	"options": [
	"Crowding problem and insufficient perplexity",
	"Algorithm failure",
	"Output dimension too high",
	"Data not normalized"
	],
	"correctAnswerIndex": 0,
	"explanation": "Crowding problem causes clusters to compress in low-dimensional space, especially with suboptimal perplexity."
	},
	{
	"id": 98,
	"questionText": "Scenario: You apply t-SNE on embeddings with high noise. Recommended step?",
	"options": [
	"Denoise or reduce dimensionality with PCA before t-SNE",
	"Use raw embeddings",
	"Reduce output dimension to 1D",
	"Increase iterations without preprocessing"
	],
	"correctAnswerIndex": 0,
	"explanation": "Preprocessing helps t-SNE focus on meaningful structure rather than noise."
	},
	{
	"id": 99,
	"questionText": "Scenario: t-SNE visualization shows variable cluster shapes between runs. Solution?",
	"options": [
	"Fix random seed and use PCA initialization",
	"Reduce dataset size",
	"Change output dimension to 1D",
	"Normalize labels"
	],
	"correctAnswerIndex": 0,
	"explanation": "Fixing seed and PCA initialization stabilizes t-SNE output across runs."
	},
	{
	"id": 100,
	"questionText": "Scenario: t-SNE applied to 500D embeddings, some clusters overlapping. Recommended approach?",
	"options": [
	"Try PCA pre-reduction, adjust perplexity and learning rate, increase iterations",
	"Reduce output dimension to 1D",
	"Use raw features",
	"Remove labels"
	],
	"correctAnswerIndex": 0,
	"explanation": "Proper preprocessing and hyperparameter tuning help t-SNE separate overlapping clusters."
	}
	]
	}