Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Commit 
							
							·
						
						4732065
	
1
								Parent(s):
							
							80d202c
								
Add creak manipulation
Browse filesThis view is limited to 50 files because it contains too many changes.  
							See raw diff
- .gitattributes +2 -0
- Dataset/Audio_files/1034_121119_000028_000001.wav +3 -0
- Dataset/Audio_files/1088_129236_000006_000007.wav +3 -0
- Dataset/Audio_files/1422_149735_000006_000000.wav +3 -0
- Dataset/Audio_files/14_212_000019_000000.wav +3 -0
- Dataset/Audio_files/1535_141644_000004_000001.wav +3 -0
- Dataset/Audio_files/1731_142320_000122_000005.wav +3 -0
- Dataset/Audio_files/3009_10327_000027_000005.wav +3 -0
- Dataset/Audio_files/329_861_000024_000003.wav +3 -0
- Dataset/Audio_files/4830_25904_000008_000001.wav +3 -0
- Dataset/Audio_files/4957_30119_000070_000001.wav +3 -0
- Dataset/Audio_files/5012_80192_000020_000003.wav +3 -0
- Dataset/Audio_files/5802_76044_000038_000000.wav +3 -0
- Dataset/Audio_files/6544_71420_000024_000001.wav +3 -0
- Dataset/Audio_files/6918_47541_000006_000008.wav +3 -0
- Dataset/Audio_files/7011_66622_000032_000002.wav +3 -0
- Dataset/Audio_files/7059_77897_000017_000001.wav +3 -0
- Dataset/Audio_files/7190_90542_000054_000000.wav +3 -0
- Dataset/Audio_files/7226_86965_000020_000001.wav +3 -0
- Dataset/Audio_files/7245_104888_000016_000000.wav +3 -0
- Dataset/Audio_files/83_9960_000017_000003.wav +3 -0
- Dataset/Audio_files/8758_296465_000020_000000.wav +3 -0
- Dataset/Audio_files/8820_294120_000011_000001.wav +3 -0
- Dataset/Embeddings/1034/1034_121119_000028_000001.pth +3 -0
- Dataset/Embeddings/1088/1088_129236_000006_000007.pth +3 -0
- Dataset/Embeddings/14/14_212_000019_000000.pth +3 -0
- Dataset/Embeddings/1422/1422_149735_000006_000000.pth +3 -0
- Dataset/Embeddings/1535/1535_141644_000004_000001.pth +3 -0
- Dataset/Embeddings/1731/1731_142320_000122_000005.pth +3 -0
- Dataset/Embeddings/3009/3009_10327_000027_000005.pth +3 -0
- Dataset/Embeddings/329/329_861_000024_000003.pth +3 -0
- Dataset/Embeddings/4830/4830_25904_000008_000001.pth +3 -0
- Dataset/Embeddings/4957/4957_30119_000070_000001.pth +3 -0
- Dataset/Embeddings/5012/5012_80192_000020_000003.pth +3 -0
- Dataset/Embeddings/5802/5802_76044_000038_000000.pth +3 -0
- Dataset/Embeddings/6544/6544_71420_000024_000001.pth +3 -0
- Dataset/Embeddings/6918/6918_47541_000006_000008.pth +3 -0
- Dataset/Embeddings/7011/7011_66622_000032_000002.pth +3 -0
- Dataset/Embeddings/7059/7059_77897_000017_000001.pth +3 -0
- Dataset/Embeddings/7190/7190_90542_000054_000000.pth +3 -0
- Dataset/Embeddings/7226/7226_86965_000020_000001.pth +3 -0
- Dataset/Embeddings/7245/7245_104888_000016_000000.pth +3 -0
- Dataset/Embeddings/83/83_9960_000017_000003.pth +3 -0
- Dataset/Embeddings/8758/8758_296465_000020_000000.pth +3 -0
- Dataset/Embeddings/8820/8820_294120_000011_000001.pth +3 -0
- Dataset/Embeddings/mean.json +258 -0
- Dataset/Embeddings/std.json +258 -0
- Dataset/dataset.yaml +67 -0
- app.py +123 -98
- models/norm_flow/config.json +14 -11
    	
        .gitattributes
    CHANGED
    
    | @@ -44,3 +44,5 @@ models/pvq_extractor/Resonance.onnx filter=lfs diff=lfs merge=lfs -text | |
| 44 | 
             
            models/pvq_extractor/Weight.onnx filter=lfs diff=lfs merge=lfs -text
         | 
| 45 | 
             
            models/norm_flow/model.pt filter=lfs diff=lfs merge=lfs -text
         | 
| 46 | 
             
            audio/1034_121119_000028_000001.wav filter=lfs diff=lfs merge=lfs -text
         | 
|  | |
|  | 
|  | |
| 44 | 
             
            models/pvq_extractor/Weight.onnx filter=lfs diff=lfs merge=lfs -text
         | 
| 45 | 
             
            models/norm_flow/model.pt filter=lfs diff=lfs merge=lfs -text
         | 
| 46 | 
             
            audio/1034_121119_000028_000001.wav filter=lfs diff=lfs merge=lfs -text
         | 
| 47 | 
            +
            Dataset/Audio_files/*.wav filter=lfs diff=lfs merge=lfs -text
         | 
| 48 | 
            +
            Dataset/Embeddings/**/*.pth filter=lfs diff=lfs merge=lfs -text
         | 
    	
        Dataset/Audio_files/1034_121119_000028_000001.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:dc00c4e893ccf708cae4366e36ede93b4e158f516323a0724fc6e9f956c76aff
         | 
| 3 | 
            +
            size 385964
         | 
    	
        Dataset/Audio_files/1088_129236_000006_000007.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:27b2e7191ba1cfad41bc1ab1bd09ec1af87062e48abbab1ef01809c76ed738da
         | 
| 3 | 
            +
            size 311084
         | 
    	
        Dataset/Audio_files/1422_149735_000006_000000.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f59d7f7a3c7364d7ac254bd94d3384e9b8e173634eb8b7492ec751d8584f8bb5
         | 
| 3 | 
            +
            size 345644
         | 
    	
        Dataset/Audio_files/14_212_000019_000000.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:1271f49cf4855d1b7d9b87e99a0c79e5505acbfba94cd8f594c1df2a29d96027
         | 
| 3 | 
            +
            size 633652
         | 
    	
        Dataset/Audio_files/1535_141644_000004_000001.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:4cc6f0bc3b9ebecbc1dab5a430c37140337a6bbeaf6f75103d74b2b4e75b4f06
         | 
| 3 | 
            +
            size 295724
         | 
    	
        Dataset/Audio_files/1731_142320_000122_000005.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:b3d3e88bc53ee1cad73100ea4ad6ccc6d9bcbc36145962d400122b658e27b7e8
         | 
| 3 | 
            +
            size 316844
         | 
    	
        Dataset/Audio_files/3009_10327_000027_000005.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:bbe46722ef2d331a5bc1c552cd6ad3c8a69022a3c70b1c03b609856dc073ca32
         | 
| 3 | 
            +
            size 309164
         | 
    	
        Dataset/Audio_files/329_861_000024_000003.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:7dab66421315b9f22bbbaf909e69184c01eaba29e536c2b449c8a7310f2edce7
         | 
| 3 | 
            +
            size 261164
         | 
    	
        Dataset/Audio_files/4830_25904_000008_000001.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:cd41aaf86c9d6e394d9afcca5e3128aa6a52fd2948e3bcf6aa03e5c18f2c7eec
         | 
| 3 | 
            +
            size 483884
         | 
    	
        Dataset/Audio_files/4957_30119_000070_000001.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f483b0a7003610ba8451db035f3347b156bb348c7aa356b7403f8ca86b98ab28
         | 
| 3 | 
            +
            size 503084
         | 
    	
        Dataset/Audio_files/5012_80192_000020_000003.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:91aa9243a6697d65e6f5464b40e9b420b5e5cdef83b64a5556baef1ac548f11e
         | 
| 3 | 
            +
            size 409004
         | 
    	
        Dataset/Audio_files/5802_76044_000038_000000.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:22964325ee8f751dddd136b3219191443270529d95ee27b45c4a789501286492
         | 
| 3 | 
            +
            size 460844
         | 
    	
        Dataset/Audio_files/6544_71420_000024_000001.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:1289b6714acb263b8bb36d6acfbb4efded0a5c67cc9b6a6246340dd3493c6c2b
         | 
| 3 | 
            +
            size 209324
         | 
    	
        Dataset/Audio_files/6918_47541_000006_000008.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:8dcd7955f5469755038482a58f0929012526f98130513acd9d0cd1bc208bbfe8
         | 
| 3 | 
            +
            size 898612
         | 
    	
        Dataset/Audio_files/7011_66622_000032_000002.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:111c88a591efa42a608d1609214e6ef56a64f3bd79a88b57efecba2ca2f7ed4c
         | 
| 3 | 
            +
            size 309164
         | 
    	
        Dataset/Audio_files/7059_77897_000017_000001.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:17f51a3d2133e81607e36403b6bcb8bd7ec9e03c1bcfbbc80b4123c1b31d6618
         | 
| 3 | 
            +
            size 243884
         | 
    	
        Dataset/Audio_files/7190_90542_000054_000000.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:34bdcfbdf51f3475465804b9dbf27f8e647ccc1af17573b0a923f44881217093
         | 
| 3 | 
            +
            size 222764
         | 
    	
        Dataset/Audio_files/7226_86965_000020_000001.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:b824a36ceaeec45724088957b1e543ee3b477ca1ee55e4c55e96ac8c2b018fb5
         | 
| 3 | 
            +
            size 622132
         | 
    	
        Dataset/Audio_files/7245_104888_000016_000000.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:92d74fe5965fff3182cb1f273c80ea051033c7aa0dbbb44ae48ccded15210216
         | 
| 3 | 
            +
            size 341804
         | 
    	
        Dataset/Audio_files/83_9960_000017_000003.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:0a1cdc21d779c1d108af86ec6a93558a501322a67c221c25e2dd32d93e0c356a
         | 
| 3 | 
            +
            size 192044
         | 
    	
        Dataset/Audio_files/8758_296465_000020_000000.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:8adafe1ab7b3e86c82454c06863dd616c5b52f91ebb8690fcc64ec7abb2821dc
         | 
| 3 | 
            +
            size 520364
         | 
    	
        Dataset/Audio_files/8820_294120_000011_000001.wav
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:b78d6b2aed1a99e8f3750bd54c50e8ed2e08dba114792fa604101faf27894708
         | 
| 3 | 
            +
            size 213164
         | 
    	
        Dataset/Embeddings/1034/1034_121119_000028_000001.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f4a7bdd020bf0da6fb08d272448c8b61c6f065e529084ce1cf9c39c1636e017c
         | 
| 3 | 
            +
            size 2358
         | 
    	
        Dataset/Embeddings/1088/1088_129236_000006_000007.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:1544023ea8afb9b0c71fa31e1e16d2ec510cf9d8637a64648941448c9e5e18ae
         | 
| 3 | 
            +
            size 2358
         | 
    	
        Dataset/Embeddings/14/14_212_000019_000000.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:3f248135ffdacc81ef4b5071f564448d49c2341b5c5c14bf4257af633f9318fd
         | 
| 3 | 
            +
            size 2269
         | 
    	
        Dataset/Embeddings/1422/1422_149735_000006_000000.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:fee0b79857cf8ce499a658dfeb5137d5b4fa7e849dd8118c32028391b88b3d08
         | 
| 3 | 
            +
            size 2358
         | 
    	
        Dataset/Embeddings/1535/1535_141644_000004_000001.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:7d71a10862f81b5a3f0877f6eb26cd4bc733e0cc9868acc65a65bb23ffe304b9
         | 
| 3 | 
            +
            size 2358
         | 
    	
        Dataset/Embeddings/1731/1731_142320_000122_000005.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:79f2de5cb40487fe19b5099b57fe0a41f0436f554019c619bcb4cd9d6c64bf36
         | 
| 3 | 
            +
            size 2358
         | 
    	
        Dataset/Embeddings/3009/3009_10327_000027_000005.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f24187ad06ecbe02df165538c6881192cfd055b5a3cc5ab1348d2c05d6567421
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/329/329_861_000024_000003.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:fc001f48b3f67d25192967e0a297dc1787144e36222e5b83a71ae6f5b89be9b3
         | 
| 3 | 
            +
            size 2274
         | 
    	
        Dataset/Embeddings/4830/4830_25904_000008_000001.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:170bdd680d0735a19c5e88e01fc8bf84dac623d7c73eebeff6e99974b8e9d081
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/4957/4957_30119_000070_000001.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:c922316a446bcc28db8a43f768ade2b2113ce0f6fab24b60b396f67264ce07c8
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/5012/5012_80192_000020_000003.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ba1c17f5100b1e0147e9c96d864cc054e8840a15cd46307e191fbe88a728b1b0
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/5802/5802_76044_000038_000000.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:7656515f537fa6de193f40d78c9747cfb1268266d3dd88a22a41ce2c3a28514a
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/6544/6544_71420_000024_000001.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f70c9bd92dea6ddfd495c7ab32cae30494eaf3b42f6d6533ff9f55de80593f05
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/6918/6918_47541_000006_000008.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ed49a76c226606f98ce4c2db2aac937354e40cc8fb789e29e93aa87f64bc01d1
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/7011/7011_66622_000032_000002.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:43e63641af7d4322b89489acb9c10cfc7e71961bd6479c55c17135b3ecfa5605
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/7059/7059_77897_000017_000001.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:688e50692694cccbe5f61c8780e0980509118f4061a44180ec8dffff2d963921
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/7190/7190_90542_000054_000000.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9f1925fcb8ce5ffa8b9223de17ea8d98c0abb24409852208f03c607374c9f60a
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/7226/7226_86965_000020_000001.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:3f88a82eee39139ab65f3b201f2657b795ad66d70ccd637f903d537df2acaca0
         | 
| 3 | 
            +
            size 2353
         | 
    	
        Dataset/Embeddings/7245/7245_104888_000016_000000.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:db50270eb45aab4344720a1da44d3c9d91ace10e69514287b3174ba9c2ca208a
         | 
| 3 | 
            +
            size 2358
         | 
    	
        Dataset/Embeddings/83/83_9960_000017_000003.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:958832b7e4e77f6eb8343b91091c8603b683b25c03f242e6de4b09952a0fba6d
         | 
| 3 | 
            +
            size 2274
         | 
    	
        Dataset/Embeddings/8758/8758_296465_000020_000000.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:465be53ae1d0a44ccdb90e0fcaccf09a0ae91041f984ef18f606df0169ea8f3e
         | 
| 3 | 
            +
            size 2358
         | 
    	
        Dataset/Embeddings/8820/8820_294120_000011_000001.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:b79ee0d4796df0776bc0ddfc8683f2a025c4829893f28b3cff6b4a2d5405d968
         | 
| 3 | 
            +
            size 2358
         | 
    	
        Dataset/Embeddings/mean.json
    ADDED
    
    | @@ -0,0 +1,258 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [
         | 
| 2 | 
            +
              0.21412190794944763,
         | 
| 3 | 
            +
              0.18206638097763062,
         | 
| 4 | 
            +
              0.11840786784887314,
         | 
| 5 | 
            +
              0.09126990288496017,
         | 
| 6 | 
            +
              0.04086871072649956,
         | 
| 7 | 
            +
              -0.149668350815773,
         | 
| 8 | 
            +
              0.2645065188407898,
         | 
| 9 | 
            +
              0.27953410148620605,
         | 
| 10 | 
            +
              0.6700411438941956,
         | 
| 11 | 
            +
              -0.06264923512935638,
         | 
| 12 | 
            +
              0.2915269732475281,
         | 
| 13 | 
            +
              0.12102372199296951,
         | 
| 14 | 
            +
              -0.5578641891479492,
         | 
| 15 | 
            +
              -0.12462181597948074,
         | 
| 16 | 
            +
              0.6190101504325867,
         | 
| 17 | 
            +
              -0.5761605501174927,
         | 
| 18 | 
            +
              -0.084229975938797,
         | 
| 19 | 
            +
              -0.0006869725184515119,
         | 
| 20 | 
            +
              0.49899742007255554,
         | 
| 21 | 
            +
              -0.21737882494926453,
         | 
| 22 | 
            +
              -0.05707789212465286,
         | 
| 23 | 
            +
              -0.18819154798984528,
         | 
| 24 | 
            +
              -0.5531325340270996,
         | 
| 25 | 
            +
              0.22641371190547943,
         | 
| 26 | 
            +
              0.07952054589986801,
         | 
| 27 | 
            +
              0.09851367026567459,
         | 
| 28 | 
            +
              0.03574512526392937,
         | 
| 29 | 
            +
              -0.13013364374637604,
         | 
| 30 | 
            +
              -0.35363155603408813,
         | 
| 31 | 
            +
              0.49086689949035645,
         | 
| 32 | 
            +
              0.08895495533943176,
         | 
| 33 | 
            +
              0.36905843019485474,
         | 
| 34 | 
            +
              -0.10707297921180725,
         | 
| 35 | 
            +
              -0.11953406780958176,
         | 
| 36 | 
            +
              0.043051160871982574,
         | 
| 37 | 
            +
              0.09323996305465698,
         | 
| 38 | 
            +
              -0.16280269622802734,
         | 
| 39 | 
            +
              -0.13945965468883514,
         | 
| 40 | 
            +
              0.2095673531293869,
         | 
| 41 | 
            +
              0.09729334712028503,
         | 
| 42 | 
            +
              0.040950167924165726,
         | 
| 43 | 
            +
              -0.37764972448349,
         | 
| 44 | 
            +
              -0.018613651394844055,
         | 
| 45 | 
            +
              -0.581308901309967,
         | 
| 46 | 
            +
              -0.4080854058265686,
         | 
| 47 | 
            +
              -0.42118221521377563,
         | 
| 48 | 
            +
              1.0161728858947754,
         | 
| 49 | 
            +
              -0.19709929823875427,
         | 
| 50 | 
            +
              -0.024254681542515755,
         | 
| 51 | 
            +
              0.04121233895421028,
         | 
| 52 | 
            +
              -0.15502692759037018,
         | 
| 53 | 
            +
              0.7614311575889587,
         | 
| 54 | 
            +
              -0.6833258271217346,
         | 
| 55 | 
            +
              0.33979618549346924,
         | 
| 56 | 
            +
              0.49055442214012146,
         | 
| 57 | 
            +
              0.011953921988606453,
         | 
| 58 | 
            +
              0.4490082263946533,
         | 
| 59 | 
            +
              0.2667522728443146,
         | 
| 60 | 
            +
              -0.6408993005752563,
         | 
| 61 | 
            +
              -0.17682728171348572,
         | 
| 62 | 
            +
              0.12336420267820358,
         | 
| 63 | 
            +
              0.1474267542362213,
         | 
| 64 | 
            +
              -0.11565382778644562,
         | 
| 65 | 
            +
              0.6467825174331665,
         | 
| 66 | 
            +
              0.10751526057720184,
         | 
| 67 | 
            +
              -0.14141449332237244,
         | 
| 68 | 
            +
              0.6352338194847107,
         | 
| 69 | 
            +
              -0.04154682531952858,
         | 
| 70 | 
            +
              0.12760530412197113,
         | 
| 71 | 
            +
              -0.6243913769721985,
         | 
| 72 | 
            +
              0.08836925774812698,
         | 
| 73 | 
            +
              0.28105032444000244,
         | 
| 74 | 
            +
              -0.15209053456783295,
         | 
| 75 | 
            +
              -0.0037005548365414143,
         | 
| 76 | 
            +
              0.3098902106285095,
         | 
| 77 | 
            +
              0.150644913315773,
         | 
| 78 | 
            +
              0.07396118342876434,
         | 
| 79 | 
            +
              -0.049714382737874985,
         | 
| 80 | 
            +
              -0.5445783138275146,
         | 
| 81 | 
            +
              -0.033714842051267624,
         | 
| 82 | 
            +
              0.1200188472867012,
         | 
| 83 | 
            +
              -0.2312866747379303,
         | 
| 84 | 
            +
              0.20238173007965088,
         | 
| 85 | 
            +
              -0.5392364263534546,
         | 
| 86 | 
            +
              -0.40682801604270935,
         | 
| 87 | 
            +
              -0.16234233975410461,
         | 
| 88 | 
            +
              -0.6470288634300232,
         | 
| 89 | 
            +
              -0.1738162636756897,
         | 
| 90 | 
            +
              0.25936004519462585,
         | 
| 91 | 
            +
              -0.15742169320583344,
         | 
| 92 | 
            +
              0.24468930065631866,
         | 
| 93 | 
            +
              0.13714095950126648,
         | 
| 94 | 
            +
              0.1449803113937378,
         | 
| 95 | 
            +
              0.16882915794849396,
         | 
| 96 | 
            +
              0.19944046437740326,
         | 
| 97 | 
            +
              -0.29332247376441956,
         | 
| 98 | 
            +
              0.0026240404695272446,
         | 
| 99 | 
            +
              0.03341501206159592,
         | 
| 100 | 
            +
              0.01569036766886711,
         | 
| 101 | 
            +
              -0.4688950777053833,
         | 
| 102 | 
            +
              0.09352052956819534,
         | 
| 103 | 
            +
              0.13269393146038055,
         | 
| 104 | 
            +
              0.06116529926657677,
         | 
| 105 | 
            +
              -0.06562789529561996,
         | 
| 106 | 
            +
              -0.23961076140403748,
         | 
| 107 | 
            +
              -0.22402845323085785,
         | 
| 108 | 
            +
              0.47103151679039,
         | 
| 109 | 
            +
              0.0728374496102333,
         | 
| 110 | 
            +
              -0.561316192150116,
         | 
| 111 | 
            +
              0.46127453446388245,
         | 
| 112 | 
            +
              0.15431830286979675,
         | 
| 113 | 
            +
              0.08550310134887695,
         | 
| 114 | 
            +
              -0.03363621234893799,
         | 
| 115 | 
            +
              0.04015417397022247,
         | 
| 116 | 
            +
              -0.014262784272432327,
         | 
| 117 | 
            +
              0.08499719202518463,
         | 
| 118 | 
            +
              -0.39322608709335327,
         | 
| 119 | 
            +
              0.27674373984336853,
         | 
| 120 | 
            +
              0.24571490287780762,
         | 
| 121 | 
            +
              -0.2642858326435089,
         | 
| 122 | 
            +
              -0.7408877015113831,
         | 
| 123 | 
            +
              0.21007885038852692,
         | 
| 124 | 
            +
              0.5898057222366333,
         | 
| 125 | 
            +
              0.14988923072814941,
         | 
| 126 | 
            +
              -0.07782910019159317,
         | 
| 127 | 
            +
              0.4078785479068756,
         | 
| 128 | 
            +
              0.3004123270511627,
         | 
| 129 | 
            +
              0.6256987452507019,
         | 
| 130 | 
            +
              -0.21651767194271088,
         | 
| 131 | 
            +
              -0.17712117731571198,
         | 
| 132 | 
            +
              -0.2749980688095093,
         | 
| 133 | 
            +
              0.4826784133911133,
         | 
| 134 | 
            +
              0.3035520911216736,
         | 
| 135 | 
            +
              0.23235619068145752,
         | 
| 136 | 
            +
              -0.061135340481996536,
         | 
| 137 | 
            +
              0.49035653471946716,
         | 
| 138 | 
            +
              -0.16356635093688965,
         | 
| 139 | 
            +
              -0.35920438170433044,
         | 
| 140 | 
            +
              0.023298246785998344,
         | 
| 141 | 
            +
              0.015880409628152847,
         | 
| 142 | 
            +
              -0.015357445925474167,
         | 
| 143 | 
            +
              -0.3540240228176117,
         | 
| 144 | 
            +
              0.44811102747917175,
         | 
| 145 | 
            +
              -0.05202110856771469,
         | 
| 146 | 
            +
              -0.19488674402236938,
         | 
| 147 | 
            +
              0.4875786602497101,
         | 
| 148 | 
            +
              -0.03857485204935074,
         | 
| 149 | 
            +
              0.463600754737854,
         | 
| 150 | 
            +
              -0.07009128481149673,
         | 
| 151 | 
            +
              0.29871219396591187,
         | 
| 152 | 
            +
              -0.35601672530174255,
         | 
| 153 | 
            +
              0.5102726817131042,
         | 
| 154 | 
            +
              0.3902379274368286,
         | 
| 155 | 
            +
              0.3692609369754791,
         | 
| 156 | 
            +
              -0.35389819741249084,
         | 
| 157 | 
            +
              0.07650414854288101,
         | 
| 158 | 
            +
              -0.63330078125,
         | 
| 159 | 
            +
              0.5580229759216309,
         | 
| 160 | 
            +
              0.10672216862440109,
         | 
| 161 | 
            +
              0.10609150677919388,
         | 
| 162 | 
            +
              0.45468848943710327,
         | 
| 163 | 
            +
              0.15291742980480194,
         | 
| 164 | 
            +
              0.36706316471099854,
         | 
| 165 | 
            +
              -0.2831500768661499,
         | 
| 166 | 
            +
              -0.14291781187057495,
         | 
| 167 | 
            +
              -0.17804013192653656,
         | 
| 168 | 
            +
              -0.5424429178237915,
         | 
| 169 | 
            +
              -0.15468499064445496,
         | 
| 170 | 
            +
              0.07343851029872894,
         | 
| 171 | 
            +
              0.5380398631095886,
         | 
| 172 | 
            +
              0.44494226574897766,
         | 
| 173 | 
            +
              0.9300274848937988,
         | 
| 174 | 
            +
              -0.0274032074958086,
         | 
| 175 | 
            +
              0.3488404154777527,
         | 
| 176 | 
            +
              -0.23694315552711487,
         | 
| 177 | 
            +
              -0.2424279898405075,
         | 
| 178 | 
            +
              -0.04125871881842613,
         | 
| 179 | 
            +
              0.06136211380362511,
         | 
| 180 | 
            +
              -0.5118930339813232,
         | 
| 181 | 
            +
              -0.15055209398269653,
         | 
| 182 | 
            +
              0.45361533761024475,
         | 
| 183 | 
            +
              0.12657225131988525,
         | 
| 184 | 
            +
              0.34210655093193054,
         | 
| 185 | 
            +
              0.313772052526474,
         | 
| 186 | 
            +
              -0.3521589934825897,
         | 
| 187 | 
            +
              0.05892332270741463,
         | 
| 188 | 
            +
              -0.11534406244754791,
         | 
| 189 | 
            +
              0.514985203742981,
         | 
| 190 | 
            +
              0.054903097450733185,
         | 
| 191 | 
            +
              0.18034562468528748,
         | 
| 192 | 
            +
              0.26060545444488525,
         | 
| 193 | 
            +
              -0.29317837953567505,
         | 
| 194 | 
            +
              0.1423174887895584,
         | 
| 195 | 
            +
              0.25360995531082153,
         | 
| 196 | 
            +
              -0.47162681818008423,
         | 
| 197 | 
            +
              0.5438259243965149,
         | 
| 198 | 
            +
              0.02562086470425129,
         | 
| 199 | 
            +
              0.020302919670939445,
         | 
| 200 | 
            +
              0.3039097189903259,
         | 
| 201 | 
            +
              0.19996808469295502,
         | 
| 202 | 
            +
              0.3423006236553192,
         | 
| 203 | 
            +
              0.4524010717868805,
         | 
| 204 | 
            +
              -0.3152591586112976,
         | 
| 205 | 
            +
              -0.60369873046875,
         | 
| 206 | 
            +
              0.16421166062355042,
         | 
| 207 | 
            +
              -0.055804263800382614,
         | 
| 208 | 
            +
              -0.35883089900016785,
         | 
| 209 | 
            +
              0.32918551564216614,
         | 
| 210 | 
            +
              -0.4741072952747345,
         | 
| 211 | 
            +
              0.05971089377999306,
         | 
| 212 | 
            +
              -0.062083590775728226,
         | 
| 213 | 
            +
              0.05729498714208603,
         | 
| 214 | 
            +
              -0.6715519428253174,
         | 
| 215 | 
            +
              0.2646842896938324,
         | 
| 216 | 
            +
              0.14343565702438354,
         | 
| 217 | 
            +
              0.2957288324832916,
         | 
| 218 | 
            +
              0.37478363513946533,
         | 
| 219 | 
            +
              -0.684753954410553,
         | 
| 220 | 
            +
              -0.14382798969745636,
         | 
| 221 | 
            +
              -0.3416562080383301,
         | 
| 222 | 
            +
              0.6120049953460693,
         | 
| 223 | 
            +
              0.24825794994831085,
         | 
| 224 | 
            +
              0.049689218401908875,
         | 
| 225 | 
            +
              0.08789665251970291,
         | 
| 226 | 
            +
              -0.518900454044342,
         | 
| 227 | 
            +
              -0.2226269692182541,
         | 
| 228 | 
            +
              0.17690403759479523,
         | 
| 229 | 
            +
              0.011226996779441833,
         | 
| 230 | 
            +
              0.05879935249686241,
         | 
| 231 | 
            +
              0.03022083267569542,
         | 
| 232 | 
            +
              0.11887083947658539,
         | 
| 233 | 
            +
              0.7854664325714111,
         | 
| 234 | 
            +
              -0.2452417016029358,
         | 
| 235 | 
            +
              0.6136188507080078,
         | 
| 236 | 
            +
              0.5491909384727478,
         | 
| 237 | 
            +
              -0.07412725687026978,
         | 
| 238 | 
            +
              -0.3089025616645813,
         | 
| 239 | 
            +
              0.16618099808692932,
         | 
| 240 | 
            +
              -0.03215228021144867,
         | 
| 241 | 
            +
              0.13637210428714752,
         | 
| 242 | 
            +
              0.10921650379896164,
         | 
| 243 | 
            +
              -0.14989499747753143,
         | 
| 244 | 
            +
              0.6000584959983826,
         | 
| 245 | 
            +
              0.19014132022857666,
         | 
| 246 | 
            +
              -0.007800411432981491,
         | 
| 247 | 
            +
              -0.06849341839551926,
         | 
| 248 | 
            +
              -0.19043166935443878,
         | 
| 249 | 
            +
              -0.012874589301645756,
         | 
| 250 | 
            +
              -0.8398106694221497,
         | 
| 251 | 
            +
              -0.002614892553538084,
         | 
| 252 | 
            +
              -0.26642924547195435,
         | 
| 253 | 
            +
              0.25869783759117126,
         | 
| 254 | 
            +
              -0.46403658390045166,
         | 
| 255 | 
            +
              0.18120701611042023,
         | 
| 256 | 
            +
              0.08567068725824356,
         | 
| 257 | 
            +
              0.08117248862981796
         | 
| 258 | 
            +
            ]
         | 
    	
        Dataset/Embeddings/std.json
    ADDED
    
    | @@ -0,0 +1,258 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [
         | 
| 2 | 
            +
              0.8075656890869141,
         | 
| 3 | 
            +
              0.8826062679290771,
         | 
| 4 | 
            +
              0.8430591821670532,
         | 
| 5 | 
            +
              0.8703321814537048,
         | 
| 6 | 
            +
              0.877600371837616,
         | 
| 7 | 
            +
              0.8111068606376648,
         | 
| 8 | 
            +
              0.8719013929367065,
         | 
| 9 | 
            +
              0.9000007510185242,
         | 
| 10 | 
            +
              0.9740477800369263,
         | 
| 11 | 
            +
              0.8267052173614502,
         | 
| 12 | 
            +
              0.8011612296104431,
         | 
| 13 | 
            +
              0.9747788906097412,
         | 
| 14 | 
            +
              0.8026949763298035,
         | 
| 15 | 
            +
              0.8818342089653015,
         | 
| 16 | 
            +
              0.8605656623840332,
         | 
| 17 | 
            +
              0.8279756903648376,
         | 
| 18 | 
            +
              0.772606611251831,
         | 
| 19 | 
            +
              0.8957112431526184,
         | 
| 20 | 
            +
              0.8716765642166138,
         | 
| 21 | 
            +
              0.7797929644584656,
         | 
| 22 | 
            +
              0.8252673149108887,
         | 
| 23 | 
            +
              0.781441330909729,
         | 
| 24 | 
            +
              0.8043056130409241,
         | 
| 25 | 
            +
              0.877123236656189,
         | 
| 26 | 
            +
              0.9237406849861145,
         | 
| 27 | 
            +
              0.7914682030677795,
         | 
| 28 | 
            +
              0.9089431166648865,
         | 
| 29 | 
            +
              0.8154596090316772,
         | 
| 30 | 
            +
              0.8381725549697876,
         | 
| 31 | 
            +
              0.8573335409164429,
         | 
| 32 | 
            +
              0.7951206564903259,
         | 
| 33 | 
            +
              0.8356125354766846,
         | 
| 34 | 
            +
              0.8639358282089233,
         | 
| 35 | 
            +
              0.8588302135467529,
         | 
| 36 | 
            +
              0.8966045379638672,
         | 
| 37 | 
            +
              0.836276113986969,
         | 
| 38 | 
            +
              0.8558772206306458,
         | 
| 39 | 
            +
              0.8904256820678711,
         | 
| 40 | 
            +
              0.8009889721870422,
         | 
| 41 | 
            +
              0.9030625820159912,
         | 
| 42 | 
            +
              0.8489034175872803,
         | 
| 43 | 
            +
              0.7720499038696289,
         | 
| 44 | 
            +
              0.780423641204834,
         | 
| 45 | 
            +
              0.7854387760162354,
         | 
| 46 | 
            +
              0.8878417611122131,
         | 
| 47 | 
            +
              0.8503796458244324,
         | 
| 48 | 
            +
              0.8932433128356934,
         | 
| 49 | 
            +
              0.9315906763076782,
         | 
| 50 | 
            +
              0.8437496423721313,
         | 
| 51 | 
            +
              0.8389645218849182,
         | 
| 52 | 
            +
              0.8701387643814087,
         | 
| 53 | 
            +
              0.9080750942230225,
         | 
| 54 | 
            +
              1.0714792013168335,
         | 
| 55 | 
            +
              0.8976108431816101,
         | 
| 56 | 
            +
              0.8437362909317017,
         | 
| 57 | 
            +
              0.8633260726928711,
         | 
| 58 | 
            +
              0.8580045700073242,
         | 
| 59 | 
            +
              0.8063361644744873,
         | 
| 60 | 
            +
              0.8105617761611938,
         | 
| 61 | 
            +
              0.8995920419692993,
         | 
| 62 | 
            +
              0.8316185474395752,
         | 
| 63 | 
            +
              0.9079830050468445,
         | 
| 64 | 
            +
              0.8115889430046082,
         | 
| 65 | 
            +
              0.8792805671691895,
         | 
| 66 | 
            +
              0.8858475685119629,
         | 
| 67 | 
            +
              0.7682526111602783,
         | 
| 68 | 
            +
              0.8312106728553772,
         | 
| 69 | 
            +
              0.8296751379966736,
         | 
| 70 | 
            +
              0.9122119545936584,
         | 
| 71 | 
            +
              0.9119444489479065,
         | 
| 72 | 
            +
              0.8761489391326904,
         | 
| 73 | 
            +
              0.8376705646514893,
         | 
| 74 | 
            +
              0.9226043820381165,
         | 
| 75 | 
            +
              0.8830709457397461,
         | 
| 76 | 
            +
              0.819685161113739,
         | 
| 77 | 
            +
              0.9397792816162109,
         | 
| 78 | 
            +
              0.833674967288971,
         | 
| 79 | 
            +
              0.8619604110717773,
         | 
| 80 | 
            +
              0.8484258651733398,
         | 
| 81 | 
            +
              0.943915605545044,
         | 
| 82 | 
            +
              0.8020740151405334,
         | 
| 83 | 
            +
              0.8027610182762146,
         | 
| 84 | 
            +
              0.9116966724395752,
         | 
| 85 | 
            +
              0.8570717573165894,
         | 
| 86 | 
            +
              0.7944185733795166,
         | 
| 87 | 
            +
              0.8977150917053223,
         | 
| 88 | 
            +
              0.9434093236923218,
         | 
| 89 | 
            +
              0.9964787364006042,
         | 
| 90 | 
            +
              0.8149264454841614,
         | 
| 91 | 
            +
              0.8179062604904175,
         | 
| 92 | 
            +
              0.832256555557251,
         | 
| 93 | 
            +
              0.866649329662323,
         | 
| 94 | 
            +
              0.8442603349685669,
         | 
| 95 | 
            +
              0.9397143125534058,
         | 
| 96 | 
            +
              0.8501031398773193,
         | 
| 97 | 
            +
              0.9365203380584717,
         | 
| 98 | 
            +
              0.8380716443061829,
         | 
| 99 | 
            +
              0.8887302279472351,
         | 
| 100 | 
            +
              0.8084500432014465,
         | 
| 101 | 
            +
              0.7769243121147156,
         | 
| 102 | 
            +
              0.8449881076812744,
         | 
| 103 | 
            +
              0.9015783667564392,
         | 
| 104 | 
            +
              0.9295680522918701,
         | 
| 105 | 
            +
              0.8259174227714539,
         | 
| 106 | 
            +
              0.8573725819587708,
         | 
| 107 | 
            +
              0.8600193858146667,
         | 
| 108 | 
            +
              0.8780449032783508,
         | 
| 109 | 
            +
              0.8595342040061951,
         | 
| 110 | 
            +
              0.7720226049423218,
         | 
| 111 | 
            +
              0.816754937171936,
         | 
| 112 | 
            +
              0.8180097937583923,
         | 
| 113 | 
            +
              0.8093970417976379,
         | 
| 114 | 
            +
              0.9032255411148071,
         | 
| 115 | 
            +
              0.8697183728218079,
         | 
| 116 | 
            +
              0.888511061668396,
         | 
| 117 | 
            +
              0.7960647940635681,
         | 
| 118 | 
            +
              0.8589795827865601,
         | 
| 119 | 
            +
              0.8813145160675049,
         | 
| 120 | 
            +
              0.8638142347335815,
         | 
| 121 | 
            +
              0.9093354344367981,
         | 
| 122 | 
            +
              0.8201130628585815,
         | 
| 123 | 
            +
              0.8607465028762817,
         | 
| 124 | 
            +
              0.9925655722618103,
         | 
| 125 | 
            +
              0.9680612683296204,
         | 
| 126 | 
            +
              0.8303309679031372,
         | 
| 127 | 
            +
              0.8515812158584595,
         | 
| 128 | 
            +
              0.8854086399078369,
         | 
| 129 | 
            +
              0.8599415421485901,
         | 
| 130 | 
            +
              0.8196620941162109,
         | 
| 131 | 
            +
              0.9137897491455078,
         | 
| 132 | 
            +
              0.8218133449554443,
         | 
| 133 | 
            +
              0.8703830242156982,
         | 
| 134 | 
            +
              0.845089852809906,
         | 
| 135 | 
            +
              0.8652607202529907,
         | 
| 136 | 
            +
              0.877587080001831,
         | 
| 137 | 
            +
              0.834847629070282,
         | 
| 138 | 
            +
              0.7999405860900879,
         | 
| 139 | 
            +
              0.867475152015686,
         | 
| 140 | 
            +
              0.9779040217399597,
         | 
| 141 | 
            +
              0.8888542652130127,
         | 
| 142 | 
            +
              0.8318555951118469,
         | 
| 143 | 
            +
              0.8721846342086792,
         | 
| 144 | 
            +
              0.8582359552383423,
         | 
| 145 | 
            +
              0.8781721591949463,
         | 
| 146 | 
            +
              0.7750568389892578,
         | 
| 147 | 
            +
              0.9456684589385986,
         | 
| 148 | 
            +
              0.8390375971794128,
         | 
| 149 | 
            +
              0.8528217077255249,
         | 
| 150 | 
            +
              0.9676473736763,
         | 
| 151 | 
            +
              0.9669485092163086,
         | 
| 152 | 
            +
              0.8177183866500854,
         | 
| 153 | 
            +
              0.8109471201896667,
         | 
| 154 | 
            +
              0.8565740585327148,
         | 
| 155 | 
            +
              1.012668490409851,
         | 
| 156 | 
            +
              0.8075276017189026,
         | 
| 157 | 
            +
              0.8120420575141907,
         | 
| 158 | 
            +
              0.8192445039749146,
         | 
| 159 | 
            +
              0.9088258743286133,
         | 
| 160 | 
            +
              0.806582510471344,
         | 
| 161 | 
            +
              0.8778362274169922,
         | 
| 162 | 
            +
              0.9832965135574341,
         | 
| 163 | 
            +
              0.8517345190048218,
         | 
| 164 | 
            +
              0.8954508900642395,
         | 
| 165 | 
            +
              0.8626090288162231,
         | 
| 166 | 
            +
              0.8306634426116943,
         | 
| 167 | 
            +
              0.7902420163154602,
         | 
| 168 | 
            +
              0.8680355548858643,
         | 
| 169 | 
            +
              0.8405691385269165,
         | 
| 170 | 
            +
              0.8080191612243652,
         | 
| 171 | 
            +
              0.8716298937797546,
         | 
| 172 | 
            +
              0.8520878553390503,
         | 
| 173 | 
            +
              0.8133600354194641,
         | 
| 174 | 
            +
              0.9267045855522156,
         | 
| 175 | 
            +
              0.8689888715744019,
         | 
| 176 | 
            +
              0.8166713118553162,
         | 
| 177 | 
            +
              0.8387840390205383,
         | 
| 178 | 
            +
              0.835797131061554,
         | 
| 179 | 
            +
              0.8922353386878967,
         | 
| 180 | 
            +
              0.8736470937728882,
         | 
| 181 | 
            +
              0.9051007032394409,
         | 
| 182 | 
            +
              0.8347994685173035,
         | 
| 183 | 
            +
              0.8269197344779968,
         | 
| 184 | 
            +
              0.7968848943710327,
         | 
| 185 | 
            +
              0.8677981495857239,
         | 
| 186 | 
            +
              0.8539698719978333,
         | 
| 187 | 
            +
              0.9122839570045471,
         | 
| 188 | 
            +
              0.907562255859375,
         | 
| 189 | 
            +
              0.908149242401123,
         | 
| 190 | 
            +
              0.8897758722305298,
         | 
| 191 | 
            +
              0.8776298761367798,
         | 
| 192 | 
            +
              0.8702916502952576,
         | 
| 193 | 
            +
              0.7712435722351074,
         | 
| 194 | 
            +
              0.8737289905548096,
         | 
| 195 | 
            +
              1.003007411956787,
         | 
| 196 | 
            +
              0.9195813536643982,
         | 
| 197 | 
            +
              0.9373644590377808,
         | 
| 198 | 
            +
              0.8549340963363647,
         | 
| 199 | 
            +
              0.8885018229484558,
         | 
| 200 | 
            +
              0.8555989265441895,
         | 
| 201 | 
            +
              0.8315033316612244,
         | 
| 202 | 
            +
              0.8457157611846924,
         | 
| 203 | 
            +
              0.8452540636062622,
         | 
| 204 | 
            +
              0.9597710967063904,
         | 
| 205 | 
            +
              0.8279005885124207,
         | 
| 206 | 
            +
              0.9954813122749329,
         | 
| 207 | 
            +
              0.8817158937454224,
         | 
| 208 | 
            +
              0.8564739227294922,
         | 
| 209 | 
            +
              0.8737724423408508,
         | 
| 210 | 
            +
              0.8833761215209961,
         | 
| 211 | 
            +
              0.9069574475288391,
         | 
| 212 | 
            +
              0.8549059629440308,
         | 
| 213 | 
            +
              0.8478658199310303,
         | 
| 214 | 
            +
              0.8306840062141418,
         | 
| 215 | 
            +
              0.8308926820755005,
         | 
| 216 | 
            +
              0.8582388162612915,
         | 
| 217 | 
            +
              0.7912089228630066,
         | 
| 218 | 
            +
              0.843919038772583,
         | 
| 219 | 
            +
              0.8585576415061951,
         | 
| 220 | 
            +
              0.850679337978363,
         | 
| 221 | 
            +
              0.921983003616333,
         | 
| 222 | 
            +
              0.8164607882499695,
         | 
| 223 | 
            +
              0.8369028568267822,
         | 
| 224 | 
            +
              0.7947129607200623,
         | 
| 225 | 
            +
              0.8371235132217407,
         | 
| 226 | 
            +
              0.8269281387329102,
         | 
| 227 | 
            +
              0.8633431196212769,
         | 
| 228 | 
            +
              0.9147580862045288,
         | 
| 229 | 
            +
              0.9019842743873596,
         | 
| 230 | 
            +
              0.8293289542198181,
         | 
| 231 | 
            +
              0.8421900868415833,
         | 
| 232 | 
            +
              0.8144598603248596,
         | 
| 233 | 
            +
              0.9013247489929199,
         | 
| 234 | 
            +
              0.7653704285621643,
         | 
| 235 | 
            +
              0.8295224905014038,
         | 
| 236 | 
            +
              0.9549149870872498,
         | 
| 237 | 
            +
              0.8671613931655884,
         | 
| 238 | 
            +
              0.8507492542266846,
         | 
| 239 | 
            +
              0.8559182286262512,
         | 
| 240 | 
            +
              0.839141309261322,
         | 
| 241 | 
            +
              0.918213427066803,
         | 
| 242 | 
            +
              0.9064037203788757,
         | 
| 243 | 
            +
              0.8579128980636597,
         | 
| 244 | 
            +
              0.8337833881378174,
         | 
| 245 | 
            +
              0.9374175071716309,
         | 
| 246 | 
            +
              0.9142330884933472,
         | 
| 247 | 
            +
              0.7878691554069519,
         | 
| 248 | 
            +
              0.8651018142700195,
         | 
| 249 | 
            +
              0.8595719933509827,
         | 
| 250 | 
            +
              0.8955603837966919,
         | 
| 251 | 
            +
              0.9085484743118286,
         | 
| 252 | 
            +
              0.8001472353935242,
         | 
| 253 | 
            +
              0.7812052369117737,
         | 
| 254 | 
            +
              0.8475046157836914,
         | 
| 255 | 
            +
              0.8226194381713867,
         | 
| 256 | 
            +
              0.8940064311027527,
         | 
| 257 | 
            +
              0.9277697801589966
         | 
| 258 | 
            +
            ]
         | 
    	
        Dataset/dataset.yaml
    ADDED
    
    | @@ -0,0 +1,67 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            dataset:
         | 
| 2 | 
            +
              '7190_90542_000054_000000':
         | 
| 3 | 
            +
                speaker_id: '7190'
         | 
| 4 | 
            +
                example_id: '7190_90542_000054_000000'
         | 
| 5 | 
            +
              '4830_25904_000008_000001':
         | 
| 6 | 
            +
                speaker_id: '4830'
         | 
| 7 | 
            +
                example_id: '4830_25904_000008_000001'
         | 
| 8 | 
            +
              '8820_294120_000011_000001':
         | 
| 9 | 
            +
                speaker_id: '8820'
         | 
| 10 | 
            +
                example_id: '8820_294120_000011_000001'
         | 
| 11 | 
            +
              '3009_10327_000027_000005':
         | 
| 12 | 
            +
                speaker_id: '3009'
         | 
| 13 | 
            +
                example_id: '3009_10327_000027_000005'
         | 
| 14 | 
            +
              '7226_86965_000020_000001':
         | 
| 15 | 
            +
                speaker_id: '7226'
         | 
| 16 | 
            +
                example_id: '7226_86965_000020_000001'
         | 
| 17 | 
            +
              '329_861_000024_000003':
         | 
| 18 | 
            +
                speaker_id: '329'
         | 
| 19 | 
            +
                example_id: '329_861_000024_000003'
         | 
| 20 | 
            +
              '5802_76044_000038_000000':
         | 
| 21 | 
            +
                speaker_id: '5802'
         | 
| 22 | 
            +
                example_id: '5802_76044_000038_000000'
         | 
| 23 | 
            +
              '1535_141644_000004_000001':
         | 
| 24 | 
            +
                speaker_id: '1535'
         | 
| 25 | 
            +
                example_id: '1535_141644_000004_000001'
         | 
| 26 | 
            +
              '7011_66622_000032_000002':
         | 
| 27 | 
            +
                speaker_id: '7011'
         | 
| 28 | 
            +
                example_id: '7011_66622_000032_000002'
         | 
| 29 | 
            +
              '8758_296465_000020_000000':
         | 
| 30 | 
            +
                speaker_id: '8758'
         | 
| 31 | 
            +
                example_id: '8758_296465_000020_000000'
         | 
| 32 | 
            +
              '1034_121119_000028_000001':
         | 
| 33 | 
            +
                speaker_id: '1034'
         | 
| 34 | 
            +
                'example_id': '1034_121119_000028_000001'
         | 
| 35 | 
            +
              '4957_30119_000070_000001':
         | 
| 36 | 
            +
                speaker_id: '4957'
         | 
| 37 | 
            +
                example_id: '4957_30119_000070_000001'
         | 
| 38 | 
            +
              '83_9960_000017_000003':
         | 
| 39 | 
            +
                speaker_id: '83'
         | 
| 40 | 
            +
                example_id: '83_9960_000017_000003'
         | 
| 41 | 
            +
              '7059_77897_000017_000001':
         | 
| 42 | 
            +
                speaker_id: '7059'
         | 
| 43 | 
            +
                example_id: '7059_77897_000017_000001'
         | 
| 44 | 
            +
              '1731_142320_000122_000005':
         | 
| 45 | 
            +
                speaker_id: '1731'
         | 
| 46 | 
            +
                example_id: '1731_142320_000122_000005'
         | 
| 47 | 
            +
              '6918_47541_000006_000008':
         | 
| 48 | 
            +
                speaker_id: '6918'
         | 
| 49 | 
            +
                example_id: '6918_47541_000006_000008'
         | 
| 50 | 
            +
              '6544_71420_000024_000001':
         | 
| 51 | 
            +
                speaker_id: '6544'
         | 
| 52 | 
            +
                example_id: '6544_71420_000024_000001'
         | 
| 53 | 
            +
              '7245_104888_000016_000000':
         | 
| 54 | 
            +
                speaker_id: '7245'
         | 
| 55 | 
            +
                example_id: '7245_104888_000016_000000'
         | 
| 56 | 
            +
              '5012_80192_000020_000003':
         | 
| 57 | 
            +
                speaker_id: '5012'
         | 
| 58 | 
            +
                example_id: '5012_80192_000020_000003'
         | 
| 59 | 
            +
              '1422_149735_000006_000000':
         | 
| 60 | 
            +
                speaker_id: '1422'
         | 
| 61 | 
            +
                example_id: '1422_149735_000006_000000'
         | 
| 62 | 
            +
              '14_212_000019_000000':
         | 
| 63 | 
            +
                speaker_id: '14'
         | 
| 64 | 
            +
                example_id: '14_212_000019_000000'
         | 
| 65 | 
            +
              '1088_129236_000006_000007':
         | 
| 66 | 
            +
                speaker_id: '1088'
         | 
| 67 | 
            +
                example_id: '1088_129236_000006_000007'
         | 
    	
        app.py
    CHANGED
    
    | @@ -1,31 +1,39 @@ | |
| 1 | 
             
            import numpy as np 
         | 
| 2 | 
             
            from pathlib import Path
         | 
| 3 | 
            -
            import padertorch as pt
         | 
| 4 | 
             
            import paderbox as pb
         | 
| 5 | 
            -
            import time
         | 
| 6 | 
             
            import torch
         | 
| 7 | 
            -
            import torchaudio
         | 
| 8 | 
             
            from onnxruntime import InferenceSession
         | 
| 9 | 
             
            from pvq_manipulation.models.vits import Vits_NT
         | 
| 10 | 
             
            from pvq_manipulation.models.ffjord import FFJORD
         | 
| 11 | 
            -
            from IPython.display import display, Audio, clear_output
         | 
| 12 | 
             
            from pvq_manipulation.models.hubert import HubertExtractor, SID_LARGE_LAYER
         | 
| 13 | 
             
            import librosa
         | 
| 14 | 
             
            from pvq_manipulation.helper.vad import EnergyVAD
         | 
| 15 | 
             
            import gradio as gr
         | 
|  | |
| 16 |  | 
| 17 | 
            -
            device = ' | 
|  | |
| 18 |  | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 22 |  | 
| 23 | 
             
            # load normalizing flow
         | 
| 24 | 
             
            storage_dir_normalizing_flow = Path("./models/norm_flow")
         | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
             
            normalizing_flow = FFJORD.load_model(storage_dir_normalizing_flow, checkpoint="model.pt", device=device)
         | 
| 28 |  | 
|  | |
|  | |
|  | |
|  | |
| 29 | 
             
            # load hubert features model
         | 
| 30 | 
             
            hubert_model = HubertExtractor(
         | 
| 31 | 
             
                layer=SID_LARGE_LAYER,
         | 
| @@ -35,140 +43,157 @@ hubert_model = HubertExtractor( | |
| 35 | 
             
                # storage_dir= # target storage dir hubert model
         | 
| 36 | 
             
            )
         | 
| 37 |  | 
| 38 | 
            -
            # example synthesis
         | 
| 39 | 
            -
            # speaker_id = 1034
         | 
| 40 | 
            -
            # example_id = "1034_121119_000028_000001"
         | 
| 41 | 
            -
             | 
| 42 | 
            -
            # wav_1 = tts_model.synthesize_from_example({
         | 
| 43 | 
            -
            #     'text' : "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", 
         | 
| 44 | 
            -
            #     'd_vector_storage_root': f"./Saved_models/Dataset/Embeddings/{speaker_id}/{example_id}.pth"
         | 
| 45 | 
            -
            # })
         | 
| 46 | 
            -
            # display(Audio(wav_1, rate=24_000, normalize=True))
         | 
| 47 |  | 
| 48 | 
            -
            # manipulation block
         | 
| 49 | 
             
            def get_manipulation(
         | 
| 50 | 
            -
                 | 
| 51 | 
             
                labels,
         | 
| 52 | 
            -
                flow, | 
| 53 | 
             
                tts_model,
         | 
|  | |
|  | |
| 54 | 
             
                manipulation_idx=0,
         | 
| 55 | 
             
                manipulation_fkt=1,
         | 
| 56 | 
             
            ):
         | 
| 57 | 
             
                labels_manipulated = labels.clone()
         | 
| 58 | 
            -
                labels_manipulated[:,manipulation_idx] += manipulation_fkt
         | 
| 59 | 
            -
             | 
| 60 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 61 | 
             
                sampled_class_manipulated = flow.sample((output_forward, labels_manipulated))[0]
         | 
| 62 |  | 
|  | |
|  | |
|  | |
| 63 | 
             
                wav = tts_model.synthesize_from_example({
         | 
| 64 | 
            -
                    'text':  | 
| 65 | 
             
                    'd_vector': d_vector.detach().numpy(),
         | 
| 66 | 
             
                    'd_vector_man': sampled_class_manipulated.detach().numpy(),
         | 
| 67 | 
            -
             | 
|  | |
| 68 | 
             
                return wav
         | 
| 69 |  | 
| 70 | 
            -
            def extract_speaker_embedding(example):
         | 
| 71 | 
            -
                observation, sr = pb.io.load_audio(example['audio_path']['observation'], return_sample_rate=True)
         | 
| 72 | 
            -
                observation = librosa.resample(observation, orig_sr=sr, target_sr=16_000)
         | 
| 73 | 
            -
                
         | 
| 74 | 
            -
                vad = EnergyVAD(sample_rate=16_000)
         | 
| 75 | 
            -
                if observation.ndim == 1:
         | 
| 76 | 
            -
                    observation = observation[None, :]
         | 
| 77 | 
            -
                
         | 
| 78 | 
            -
                observation = vad({'audio_data': observation})['audio_data']
         | 
| 79 | 
            -
                
         | 
| 80 | 
            -
                with torch.no_grad():
         | 
| 81 | 
            -
                    example = tts_model.speaker_manager.prepare_example({'audio_data': {'observation': observation}, **example})
         | 
| 82 | 
            -
                    example = pt.data.utils.collate_fn([example])
         | 
| 83 | 
            -
                    example['features'] = torch.tensor(np.array(example['features']))
         | 
| 84 | 
            -
                    d_vector = tts_model.speaker_manager.forward(example)[0]
         | 
| 85 | 
            -
                return d_vector
         | 
| 86 | 
            -
             | 
| 87 | 
            -
            # load speaker labels
         | 
| 88 | 
            -
            def load_speaker_labels(example, speaker_conditioning, reg_stor_dir=Path('./models/pvq_extractor/')):
         | 
| 89 | 
            -
                audio, _ = torchaudio.load(example['audio_path']['observation'])
         | 
| 90 | 
            -
                audio = audio.to(device)
         | 
| 91 | 
            -
                num_samples = torch.tensor([audio.shape[-1]], device=device)
         | 
| 92 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 93 | 
             
                providers = ["CPUExecutionProvider"]
         | 
| 94 |  | 
| 95 | 
             
                with torch.no_grad():
         | 
| 96 | 
             
                    features, seq_len = hubert_model(
         | 
| 97 | 
            -
                         | 
| 98 | 
            -
                         | 
| 99 | 
             
                        sequence_lengths=num_samples,
         | 
| 100 | 
             
                    )
         | 
| 101 | 
             
                    features = np.mean(features.squeeze(0).detach().cpu().numpy(), axis=-1)
         | 
| 102 | 
            -
             | 
| 103 | 
             
                    pvqd_predictions = {}
         | 
| 104 | 
            -
                    for pvq in  | 
| 105 | 
             
                        with open(reg_stor_dir / f"{pvq}.onnx", "rb") as fid:
         | 
| 106 | 
             
                            onnx = fid.read()
         | 
| 107 | 
             
                        sess = InferenceSession(onnx, providers=providers)
         | 
| 108 | 
             
                        pred = sess.run(None, {"X": features[None]})[0].squeeze(1)
         | 
| 109 | 
             
                        pvqd_predictions[pvq] = pred.tolist()[0]
         | 
| 110 | 
            -
                labels = []
         | 
| 111 | 
            -
                for key in speaker_conditioning:
         | 
| 112 | 
            -
                    labels.append(pvqd_predictions[key]/100)
         | 
| 113 | 
            -
                return torch.tensor(labels)
         | 
| 114 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 115 |  | 
| 116 | 
            -
            example = {
         | 
| 117 | 
            -
                'audio_path': {'observation': "audio/1034_121119_000028_000001.wav"},
         | 
| 118 | 
            -
                'speaker_id': 1034,
         | 
| 119 | 
            -
                'example_id': "1034_121119_000028_000001",
         | 
| 120 | 
            -
            }
         | 
| 121 |  | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 |  | 
| 125 | 
            -
             | 
| 126 | 
            -
            # max_len = max(len(name) for name in label_options)  
         | 
| 127 | 
            -
            # for label_name, pvq in zip(label_options, labels):
         | 
| 128 | 
            -
                # print(f'{label_name:<{max_len}} : {pvq:6.2f}')
         | 
| 129 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 130 |  | 
| 131 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 132 |  | 
| 133 | 
            -
                d_vector = extract_speaker_embedding(example)
         | 
| 134 | 
            -
                labels = load_speaker_labels(example, speaker_conditioning)
         | 
| 135 | 
            -
                
         | 
| 136 | 
             
                wav_manipulated = get_manipulation(
         | 
| 137 | 
            -
                     | 
| 138 | 
            -
                    d_vector= | 
| 139 | 
            -
                    labels= | 
| 140 | 
             
                    flow=normalizing_flow,
         | 
| 141 | 
             
                    tts_model=tts_model,
         | 
| 142 | 
             
                    manipulation_idx=manipulation_idx,
         | 
| 143 | 
             
                    manipulation_fkt=manipulation_fkt,
         | 
|  | |
| 144 | 
             
                )
         | 
| 145 | 
            -
                
         | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 148 | 
            -
                    'd_vector': d_vector.detach().numpy(),
         | 
| 149 | 
            -
                })
         | 
| 150 | 
            -
                sr = 24_000
         | 
| 151 | 
            -
                return (sr, wav_unmanipulated), (sr, wav_manipulated)
         | 
| 152 | 
            -
                
         | 
| 153 | 
            -
                # with audio_output:
         | 
| 154 | 
            -
                #     clear_output(wait=True) 
         | 
| 155 | 
            -
                #     print('Manipulated Speaker')
         | 
| 156 | 
            -
                #     display(Audio(wav_manipulated, rate=24_000, normalize=True))
         | 
| 157 | 
            -
                #     print('Unmanipulated Synthese')
         | 
| 158 | 
            -
                #     display(Audio(wav_unmanipulated, rate=24_000, normalize=True))
         | 
| 159 | 
            -
                #     print('Original Speaker')
         | 
| 160 | 
            -
                #     display(Audio(example['audio_path']['observation'], rate=24_000, normalize=True))
         | 
| 161 | 
            -
             | 
| 162 | 
            -
                # print(f"Manipulated {label_options[manipulation_idx]} with strength {manipulation_fkt}")
         | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
            dropdown_options = [(label, i) for i, label in enumerate(label_options)]
         | 
| 166 | 
             
            demo = gr.Interface(
         | 
| 167 | 
             
                title="Perceptual Voice Quality (PVQ) Manipulation",
         | 
| 168 | 
             
                fn=update_manipulation,
         | 
| 169 | 
             
                inputs=[
         | 
| 170 | 
            -
                    gr.Dropdown( | 
| 171 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 172 | 
             
                ],
         | 
| 173 | 
             
                outputs=[gr.Audio(label="original utterance"), gr.Audio(label="manipulated utterance")],
         | 
| 174 | 
             
            )
         | 
|  | |
| 1 | 
             
            import numpy as np 
         | 
| 2 | 
             
            from pathlib import Path
         | 
|  | |
| 3 | 
             
            import paderbox as pb
         | 
|  | |
| 4 | 
             
            import torch
         | 
|  | |
| 5 | 
             
            from onnxruntime import InferenceSession
         | 
| 6 | 
             
            from pvq_manipulation.models.vits import Vits_NT
         | 
| 7 | 
             
            from pvq_manipulation.models.ffjord import FFJORD
         | 
|  | |
| 8 | 
             
            from pvq_manipulation.models.hubert import HubertExtractor, SID_LARGE_LAYER
         | 
| 9 | 
             
            import librosa
         | 
| 10 | 
             
            from pvq_manipulation.helper.vad import EnergyVAD
         | 
| 11 | 
             
            import gradio as gr
         | 
| 12 | 
            +
            from pvq_manipulation.helper.creapy_wrapper import process_file
         | 
| 13 |  | 
| 14 | 
            +
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
         | 
| 15 | 
            +
            pvq_labels = ['Weight', 'Resonance', 'Breathiness', 'Roughness', 'Loudness', 'Strain', 'Pitch']
         | 
| 16 |  | 
| 17 | 
            +
            dataset_dict = pb.io.load_yaml('./Dataset/dataset.yaml')
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            cached_example_id = None
         | 
| 20 | 
            +
            cached_loaded_example = None
         | 
| 21 | 
            +
            cached_labels = None
         | 
| 22 | 
            +
            cached_d_vector = None
         | 
| 23 | 
            +
            cached_unmanipulated = None
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            # path to stats
         | 
| 26 | 
            +
            stats_path = Path('./Dataset/Embeddings/')
         | 
| 27 |  | 
| 28 | 
             
            # load normalizing flow
         | 
| 29 | 
             
            storage_dir_normalizing_flow = Path("./models/norm_flow")
         | 
| 30 | 
            +
            config_norm_flow = pb.io.load_yaml(storage_dir_normalizing_flow / "config.json")
         | 
|  | |
| 31 | 
             
            normalizing_flow = FFJORD.load_model(storage_dir_normalizing_flow, checkpoint="model.pt", device=device)
         | 
| 32 |  | 
| 33 | 
            +
            # load tts model
         | 
| 34 | 
            +
            storage_dir_tts = Path("./models/tts_model/")
         | 
| 35 | 
            +
            tts_model = Vits_NT.load_model(storage_dir_tts, "model.pt")
         | 
| 36 | 
            +
             | 
| 37 | 
             
            # load hubert features model
         | 
| 38 | 
             
            hubert_model = HubertExtractor(
         | 
| 39 | 
             
                layer=SID_LARGE_LAYER,
         | 
|  | |
| 43 | 
             
                # storage_dir= # target storage dir hubert model
         | 
| 44 | 
             
            )
         | 
| 45 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 46 |  | 
|  | |
| 47 | 
             
            def get_manipulation(
         | 
| 48 | 
            +
                example,
         | 
| 49 | 
             
                labels,
         | 
| 50 | 
            +
                flow,
         | 
| 51 | 
             
                tts_model,
         | 
| 52 | 
            +
                d_vector,
         | 
| 53 | 
            +
                config_norm_flow,
         | 
| 54 | 
             
                manipulation_idx=0,
         | 
| 55 | 
             
                manipulation_fkt=1,
         | 
| 56 | 
             
            ):
         | 
| 57 | 
             
                labels_manipulated = labels.clone()
         | 
| 58 | 
            +
                labels_manipulated[:, manipulation_idx] += manipulation_fkt
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                if config_norm_flow['flag_remove_mean']:
         | 
| 61 | 
            +
                    global_mean = pb.io.load(stats_path / "mean.json")
         | 
| 62 | 
            +
                    global_mean = torch.tensor(global_mean, dtype=torch.float32)
         | 
| 63 | 
            +
                    speaker_embedding_norm = (d_vector - global_mean)
         | 
| 64 | 
            +
                    global_std = pb.io.load(stats_path / "std.json")
         | 
| 65 | 
            +
                    global_std = torch.tensor(global_std, dtype=torch.float32)
         | 
| 66 | 
            +
                    speaker_embedding_norm = speaker_embedding_norm / global_std
         | 
| 67 | 
            +
                else:
         | 
| 68 | 
            +
                    speaker_embedding_norm = d_vector
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                output_forward = flow.forward((speaker_embedding_norm.float(), labels))[0]
         | 
| 71 | 
             
                sampled_class_manipulated = flow.sample((output_forward, labels_manipulated))[0]
         | 
| 72 |  | 
| 73 | 
            +
                if config_norm_flow['flag_remove_mean']:
         | 
| 74 | 
            +
                    sampled_class_manipulated = (sampled_class_manipulated * global_std + global_mean)
         | 
| 75 | 
            +
             | 
| 76 | 
             
                wav = tts_model.synthesize_from_example({
         | 
| 77 | 
            +
                    'text': example['transcription'],
         | 
| 78 | 
             
                    'd_vector': d_vector.detach().numpy(),
         | 
| 79 | 
             
                    'd_vector_man': sampled_class_manipulated.detach().numpy(),
         | 
| 80 | 
            +
                    'd_vector_storage_root': example['d_vector_storage_root'],
         | 
| 81 | 
            +
                })
         | 
| 82 | 
             
                return wav
         | 
| 83 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 84 |  | 
| 85 | 
            +
            def get_creak_label(example):
         | 
| 86 | 
            +
                audio_data = example['loaded_audio_data']['16_000']
         | 
| 87 | 
            +
                test, y_pred, included_indices = process_file(audio_data)
         | 
| 88 | 
            +
                mean_creak = np.mean(y_pred[included_indices])
         | 
| 89 | 
            +
                return mean_creak * 100
         | 
| 90 | 
            +
             | 
| 91 | 
            +
             | 
| 92 | 
            +
            def load_speaker_labels(example, reg_stor_dir=Path('./models/pvq_extractor/')):
         | 
| 93 | 
            +
                audio_data = torch.tensor(example['loaded_audio_data']['16_000'], dtype=torch.float)[None, :]
         | 
| 94 | 
            +
                num_samples = torch.tensor([audio_data.shape[-1]])
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                if torch.cuda.is_available():
         | 
| 97 | 
            +
                    audio_data = audio_data.cuda()
         | 
| 98 | 
            +
                    num_samples = num_samples.cuda()
         | 
| 99 | 
             
                providers = ["CPUExecutionProvider"]
         | 
| 100 |  | 
| 101 | 
             
                with torch.no_grad():
         | 
| 102 | 
             
                    features, seq_len = hubert_model(
         | 
| 103 | 
            +
                        audio_data,
         | 
| 104 | 
            +
                        16_000,
         | 
| 105 | 
             
                        sequence_lengths=num_samples,
         | 
| 106 | 
             
                    )
         | 
| 107 | 
             
                    features = np.mean(features.squeeze(0).detach().cpu().numpy(), axis=-1)
         | 
|  | |
| 108 | 
             
                    pvqd_predictions = {}
         | 
| 109 | 
            +
                    for pvq in pvq_labels:
         | 
| 110 | 
             
                        with open(reg_stor_dir / f"{pvq}.onnx", "rb") as fid:
         | 
| 111 | 
             
                            onnx = fid.read()
         | 
| 112 | 
             
                        sess = InferenceSession(onnx, providers=providers)
         | 
| 113 | 
             
                        pred = sess.run(None, {"X": features[None]})[0].squeeze(1)
         | 
| 114 | 
             
                        pvqd_predictions[pvq] = pred.tolist()[0]
         | 
|  | |
|  | |
|  | |
|  | |
| 115 |  | 
| 116 | 
            +
                pvqd_predictions['Creak_mean'] = get_creak_label(example)
         | 
| 117 | 
            +
                labels = [pvqd_predictions[key] / 100 for key in pvq_labels + ["Creak_mean"]]
         | 
| 118 | 
            +
                return torch.tensor(labels, device=device).float()
         | 
| 119 | 
            +
             | 
| 120 | 
            +
             | 
| 121 | 
            +
            def load_audio_files(example):
         | 
| 122 | 
            +
                observation_loaded, sr = pb.io.load_audio(example['audio_path']['observation'], return_sample_rate=True)
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                example['loaded_audio_data'] = {}
         | 
| 125 | 
            +
                observation = librosa.resample(observation_loaded, orig_sr=sr, target_sr=16_000)
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                vad = EnergyVAD(sample_rate=16_000)
         | 
| 128 | 
            +
                if observation.ndim == 1:
         | 
| 129 | 
            +
                    observation = observation[None, :]
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                observation = vad({'audio_data': observation})['audio_data']
         | 
| 132 | 
            +
                example['loaded_audio_data']['16_000'] = observation
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                observation = librosa.resample(observation, orig_sr=sr, target_sr=24_000)
         | 
| 135 | 
            +
                vad = EnergyVAD(sample_rate=24_000)
         | 
| 136 | 
            +
                if observation.ndim == 1:
         | 
| 137 | 
            +
                    observation = observation[None, :]
         | 
| 138 | 
            +
                observation = vad({'audio_data': observation})['audio_data']
         | 
| 139 | 
            +
                example['loaded_audio_data']['24_000'] = observation
         | 
| 140 | 
            +
                return example
         | 
| 141 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 142 |  | 
| 143 | 
            +
            def update_manipulation(manipulation_idx, example_id, transcription, manipulation_fkt):
         | 
| 144 | 
            +
                global cached_example_id, cached_loaded_example, cached_labels, cached_d_vector, example_database, cached_unmanipulated
         | 
| 145 |  | 
| 146 | 
            +
                speaker_id = dataset_dict['dataset'][example_id]['speaker_id']
         | 
|  | |
|  | |
|  | |
| 147 |  | 
| 148 | 
            +
                example = {
         | 
| 149 | 
            +
                    'audio_path': {'observation': f"./Dataset/Audio_files/{example_id}.wav"},
         | 
| 150 | 
            +
                    'd_vector_storage_root': f"./Saved_models/Dataset/Embeddings/{speaker_id}/{example_id}.pth",
         | 
| 151 | 
            +
                    'speaker_id': speaker_id,
         | 
| 152 | 
            +
                    'example_id': example_id,
         | 
| 153 | 
            +
                    'transcription': transcription
         | 
| 154 | 
            +
                }
         | 
| 155 |  | 
| 156 | 
            +
                if cached_example_id != example_id:
         | 
| 157 | 
            +
                    cached_loaded_example = load_audio_files(example)
         | 
| 158 | 
            +
                    cached_d_vector = torch.load(f"./Dataset/Embeddings/{speaker_id}/{example_id}.pth")
         | 
| 159 | 
            +
                    cached_labels = load_speaker_labels(example)
         | 
| 160 | 
            +
                    cached_example_id = example_id
         | 
| 161 | 
            +
                    cached_unmanipulated = tts_model.synthesize_from_example({
         | 
| 162 | 
            +
                        'text': transcription,
         | 
| 163 | 
            +
                        'd_vector': cached_d_vector.detach().numpy(),
         | 
| 164 | 
            +
                    })
         | 
| 165 |  | 
|  | |
|  | |
|  | |
| 166 | 
             
                wav_manipulated = get_manipulation(
         | 
| 167 | 
            +
                    example=example,
         | 
| 168 | 
            +
                    d_vector=cached_d_vector,
         | 
| 169 | 
            +
                    labels=cached_labels[None, :],
         | 
| 170 | 
             
                    flow=normalizing_flow,
         | 
| 171 | 
             
                    tts_model=tts_model,
         | 
| 172 | 
             
                    manipulation_idx=manipulation_idx,
         | 
| 173 | 
             
                    manipulation_fkt=manipulation_fkt,
         | 
| 174 | 
            +
                    config_norm_flow=config_norm_flow,
         | 
| 175 | 
             
                )
         | 
| 176 | 
            +
                return (24_000, cached_unmanipulated), (24_000, wav_manipulated)
         | 
| 177 | 
            +
             | 
| 178 | 
            +
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 179 | 
             
            demo = gr.Interface(
         | 
| 180 | 
             
                title="Perceptual Voice Quality (PVQ) Manipulation",
         | 
| 181 | 
             
                fn=update_manipulation,
         | 
| 182 | 
             
                inputs=[
         | 
| 183 | 
            +
                    gr.Dropdown(
         | 
| 184 | 
            +
                        label="PVQ Feature",
         | 
| 185 | 
            +
                        choices=[('Weight', 0), ('Resonance', 1), ('Breathiness', 2), ('Roughness', 3), ('Creak', 7)],
         | 
| 186 | 
            +
                        value=2, type="value"
         | 
| 187 | 
            +
                    ),
         | 
| 188 | 
            +
                    gr.Dropdown(
         | 
| 189 | 
            +
                        choices=dataset_dict['dataset'].keys(),
         | 
| 190 | 
            +
                        value='1422_149735_000006_000000', type="value"
         | 
| 191 | 
            +
                    ),
         | 
| 192 | 
            +
                    gr.Textbox(
         | 
| 193 | 
            +
                        value="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
         | 
| 194 | 
            +
                        placeholder='Type something'
         | 
| 195 | 
            +
                    ),
         | 
| 196 | 
            +
                    gr.Slider(label="Manipulation Factor", minimum=-1.0, maximum=2.0, value=1.0, step=0.1),
         | 
| 197 | 
             
                ],
         | 
| 198 | 
             
                outputs=[gr.Audio(label="original utterance"), gr.Audio(label="manipulated utterance")],
         | 
| 199 | 
             
            )
         | 
    	
        models/norm_flow/config.json
    CHANGED
    
    | @@ -1,12 +1,15 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
              " | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 5 | 
            -
                " | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
                   | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
            }
         | 
|  | |
|  | |
|  | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
            +
              "model":{
         | 
| 3 | 
            +
                "factory": "pvq_manipulation.models.ffjord.FFJORD",
         | 
| 4 | 
            +
                "normalize": true,
         | 
| 5 | 
            +
                "ode_function": {
         | 
| 6 | 
            +
                  "condition_dim": 8,
         | 
| 7 | 
            +
                  "factory": "pvq_manipulation.models.ode_functions.CNFNN",
         | 
| 8 | 
            +
                  "hidden_channels": [
         | 
| 9 | 
            +
                    512
         | 
| 10 | 
            +
                  ],
         | 
| 11 | 
            +
                  "input_dim": 256
         | 
| 12 | 
            +
                }
         | 
| 13 | 
            +
              },
         | 
| 14 | 
            +
              "flag_remove_mean": true
         | 
| 15 | 
            +
            }
         | 
