31
31
import java .util .List ;
32
32
import java .util .Map ;
33
33
import java .util .Set ;
34
- import java .util .TreeMap ;
35
34
36
35
import org .apache .carbondata .common .logging .LogServiceFactory ;
37
36
import org .apache .carbondata .core .constants .CarbonCommonConstants ;
38
37
import org .apache .carbondata .core .datastore .block .SegmentProperties ;
39
38
import org .apache .carbondata .core .datastore .block .TableBlockInfo ;
40
39
import org .apache .carbondata .core .datastore .compression .CompressorFactory ;
41
- import org .apache .carbondata .core .datastore .filesystem .AbstractDFSCarbonFile ;
42
40
import org .apache .carbondata .core .datastore .filesystem .CarbonFile ;
43
- import org .apache .carbondata .core .datastore .filesystem .S3CarbonFile ;
44
41
import org .apache .carbondata .core .datastore .impl .FileFactory ;
45
42
import org .apache .carbondata .core .index .Segment ;
46
43
import org .apache .carbondata .core .indexstore .BlockMetaInfo ;
61
58
import org .apache .carbondata .core .util .path .CarbonTablePath ;
62
59
63
60
import org .apache .commons .io .FilenameUtils ;
64
- import org .apache .hadoop .conf .Configuration ;
65
- import org .apache .hadoop .fs .Path ;
66
- import org .apache .hadoop .fs .PathFilter ;
67
61
import org .apache .log4j .Logger ;
68
62
69
63
public class BlockletIndexUtil {
@@ -79,8 +73,7 @@ public static Set<TableBlockIndexUniqueIdentifier> getSegmentUniqueIdentifiers(S
79
73
80
74
public static Map <String , BlockMetaInfo > getBlockMetaInfoMap (
81
75
TableBlockIndexUniqueIdentifierWrapper identifierWrapper ,
82
- SegmentIndexFileStore indexFileStore , Set <String > filesRead ,
83
- Map <String , BlockMetaInfo > fileNameToMetaInfoMapping , List <DataFileFooter > indexInfos )
76
+ SegmentIndexFileStore indexFileStore , Set <String > filesRead , List <DataFileFooter > indexInfos )
84
77
throws IOException {
85
78
boolean isTransactionalTable = true ;
86
79
TableBlockIndexUniqueIdentifier identifier =
@@ -130,8 +123,7 @@ public static Map<String, BlockMetaInfo> getBlockMetaInfoMap(
130
123
}
131
124
String blockPath = footer .getBlockInfo ().getFilePath ();
132
125
if (null == blockMetaInfoMap .get (blockPath )) {
133
- BlockMetaInfo blockMetaInfo = createBlockMetaInfo (
134
- fileNameToMetaInfoMapping , footer .getBlockInfo ());
126
+ BlockMetaInfo blockMetaInfo = createBlockMetaInfo (footer .getBlockInfo ());
135
127
// if blockMetaInfo is null that means the file has been deleted from the file system.
136
128
// This can happen in case IUD scenarios where after deleting or updating the data the
137
129
// complete block is deleted but the entry still exists in index or merge index file
@@ -143,38 +135,7 @@ public static Map<String, BlockMetaInfo> getBlockMetaInfoMap(
143
135
return blockMetaInfoMap ;
144
136
}
145
137
146
- /**
147
- * This method will create file name to block Meta Info Mapping. This method will reduce the
148
- * number of nameNode calls and using this method one namenode will fetch 1000 entries
149
- *
150
- * @param segmentFilePath
151
- * @return
152
- * @throws IOException
153
- */
154
- public static Map <String , BlockMetaInfo > createCarbonDataFileBlockMetaInfoMapping (
155
- String segmentFilePath , Configuration configuration ) throws IOException {
156
- Map <String , BlockMetaInfo > fileNameToMetaInfoMapping = new TreeMap ();
157
- CarbonFile carbonFile = FileFactory .getCarbonFile (segmentFilePath , configuration );
158
- if (carbonFile instanceof AbstractDFSCarbonFile && !(carbonFile instanceof S3CarbonFile )) {
159
- PathFilter pathFilter = new PathFilter () {
160
- @ Override
161
- public boolean accept (Path path ) {
162
- return CarbonTablePath .isCarbonDataFile (path .getName ());
163
- }
164
- };
165
- CarbonFile [] carbonFiles = carbonFile .locationAwareListFiles (pathFilter );
166
- for (CarbonFile file : carbonFiles ) {
167
- String [] location = file .getLocations ();
168
- long len = file .getSize ();
169
- BlockMetaInfo blockMetaInfo = new BlockMetaInfo (location , len );
170
- fileNameToMetaInfoMapping .put (file .getPath (), blockMetaInfo );
171
- }
172
- }
173
- return fileNameToMetaInfoMapping ;
174
- }
175
-
176
- private static BlockMetaInfo createBlockMetaInfo (
177
- Map <String , BlockMetaInfo > fileNameToMetaInfoMapping , TableBlockInfo blockInfo )
138
+ private static BlockMetaInfo createBlockMetaInfo (TableBlockInfo blockInfo )
178
139
throws IOException {
179
140
String carbonDataFile = blockInfo .getFilePath ();
180
141
FileFactory .FileType fileType = FileFactory .getFileType (carbonDataFile );
@@ -193,7 +154,18 @@ private static BlockMetaInfo createBlockMetaInfo(
193
154
CarbonFile carbonFile = FileFactory .getCarbonFile (carbonDataFile );
194
155
return new BlockMetaInfo (new String [] { "localhost" }, carbonFile .getSize ());
195
156
default :
196
- return fileNameToMetaInfoMapping .get (FileFactory .getFormattedPath (carbonDataFile ));
157
+ // Here, it gets carbondata file path from footer and creates BlockMetaInfo for each file.
158
+ // It creates BlockMetaInfo only for valid files.
159
+ // Example: Assume a single partition with 1000 carbondata files.
160
+ // Perform 1st update: adds 900 new carbondata files to same folder.
161
+ // Perform 2nd update (same update query): adds another 900 carbondata files.
162
+ // Now the files added by 1st update are invalid.
163
+ // Perform query: Creates BlockMetaInfo for valid identifiers.
164
+ if (!FileFactory .isFileExist (carbonDataFile )) {
165
+ return null ;
166
+ }
167
+ CarbonFile file = FileFactory .getCarbonFile (FileFactory .getFormattedPath (carbonDataFile ));
168
+ return new BlockMetaInfo (file .getLocations (), file .getSize ());
197
169
}
198
170
}
199
171
0 commit comments